Version in base suite: 10.11.11-0+deb12u1 Base version: mariadb_10.11.11-0+deb12u1 Target version: mariadb_10.11.13-0+deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/m/mariadb/mariadb_10.11.11-0+deb12u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/m/mariadb/mariadb_10.11.13-0+deb12u1.dsc /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/dictcmp/dictcmp_raw_sampled.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/dictcmp/dictcmp_sst_blocks.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/dictcmp/dictcmp_zstd_trained.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Legacy_Vs_Integrated.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_RW_RO_Perf.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Amp.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/integrated-blob-db/BlobDB_Benchmarks_Write_Perf.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/rocksdb-secondary-cache/Mixgraph_hit_rate.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/rocksdb-secondary-cache/Mixgraph_throughput.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/rocksdb-secondary-cache/arch_diagram.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/rocksdb-secondary-cache/insert_flow.png |binary /srv/release.debian.org/tmp/EHlq1RjOhk/mariadb-10.11.13/storage/rocksdb/rocksdb/docs/static/images/rocksdb-secondary-cache/lookup_flow.png |binary mariadb-10.11.13/CMakeLists.txt | 10 mariadb-10.11.13/Docs/INFO_SRC | 10 mariadb-10.11.13/VERSION | 2 mariadb-10.11.13/appveyor.yml | 38 mariadb-10.11.13/client/mysql_upgrade.c | 14 mariadb-10.11.13/client/mysqlbinlog.cc | 61 mariadb-10.11.13/client/mysqldump.c | 15 mariadb-10.11.13/client/mysqlslap.c | 12 mariadb-10.11.13/client/mysqltest.cc | 16 mariadb-10.11.13/cmake/cpack_rpm.cmake | 4 mariadb-10.11.13/cmake/libfmt.cmake | 5 mariadb-10.11.13/cmake/os/Windows.cmake | 520 mariadb-10.11.13/cmake/os/WindowsCache.cmake | 19 mariadb-10.11.13/cmake/pcre.cmake | 12 mariadb-10.11.13/cmake/plugin.cmake | 5 mariadb-10.11.13/config.h.cmake | 48 mariadb-10.11.13/debian/changelog | 39 mariadb-10.11.13/debian/mariadb-server-core.postinst | 49 mariadb-10.11.13/debian/patches/fix-reproducible-builds-rocksdb.patch | 26 mariadb-10.11.13/debian/patches/fix-spelling-rocksdb.patch | 38 mariadb-10.11.13/debian/patches/rocksdb-kfreebsd.patch | 150 mariadb-10.11.13/debian/patches/series | 3 mariadb-10.11.13/debian/salsa-ci-enable-sec-and-update-repos.sh | 12 mariadb-10.11.13/debian/salsa-ci.yml | 11 mariadb-10.11.13/debian/tests/traces/mariadb-verbose-help.expected | 5 mariadb-10.11.13/debian/tests/traces/mariadbd-verbose-help.expected | 21 mariadb-10.11.13/extra/mariabackup/backup_mysql.cc | 2 mariadb-10.11.13/extra/mariabackup/common_engine.cc | 6 mariadb-10.11.13/extra/mariabackup/innobackupex.cc | 7 mariadb-10.11.13/extra/mariabackup/write_filt.cc | 12 mariadb-10.11.13/extra/mariabackup/xtrabackup.cc | 80 mariadb-10.11.13/include/json_lib.h | 5 mariadb-10.11.13/include/my_base.h | 5 mariadb-10.11.13/include/my_cpu.h | 7 mariadb-10.11.13/include/my_stack_alloc.h | 2 mariadb-10.11.13/include/my_sys.h | 8 mariadb-10.11.13/include/my_virtual_mem.h | 37 mariadb-10.11.13/include/source_revision.h | 2 mariadb-10.11.13/include/sslopt-longopts.h | 3 mariadb-10.11.13/libmariadb/CMakeLists.txt | 2 mariadb-10.11.13/libmariadb/include/errmsg.h | 3 mariadb-10.11.13/libmariadb/include/ma_context.h | 25 mariadb-10.11.13/libmariadb/include/mariadb_com.h | 22 mariadb-10.11.13/libmariadb/libmariadb/CMakeLists.txt | 6 mariadb-10.11.13/libmariadb/libmariadb/ma_context.c | 38 mariadb-10.11.13/libmariadb/libmariadb/ma_errmsg.c | 2 mariadb-10.11.13/libmariadb/libmariadb/mariadb_lib.c | 9 mariadb-10.11.13/libmariadb/libmariadb/mariadb_stmt.c | 6 mariadb-10.11.13/libmariadb/plugins/pvio/pvio_socket.c | 10 mariadb-10.11.13/libmariadb/unittest/libmariadb/connection.c | 83 mariadb-10.11.13/libmariadb/unittest/libmariadb/errors.c | 74 mariadb-10.11.13/libmariadb/unittest/libmariadb/ps_bugs.c | 52 mariadb-10.11.13/mysql-test/CMakeLists.txt | 2 mariadb-10.11.13/mysql-test/include/long_test.inc | 2 mariadb-10.11.13/mysql-test/lib/My/SafeProcess.pm | 3 mariadb-10.11.13/mysql-test/lib/My/SafeProcess/safe_process.cc | 17 mariadb-10.11.13/mysql-test/main/backup_locks.test | 1 mariadb-10.11.13/mysql-test/main/comment_database.result | 13 mariadb-10.11.13/mysql-test/main/comment_database.test | 8 mariadb-10.11.13/mysql-test/main/ctype_utf8_def_upgrade.result | 2 mariadb-10.11.13/mysql-test/main/derived_cond_pushdown.result | 194 mariadb-10.11.13/mysql-test/main/derived_cond_pushdown.test | 22 mariadb-10.11.13/mysql-test/main/derived_view.result | 2 mariadb-10.11.13/mysql-test/main/func_json.result | 37 mariadb-10.11.13/mysql-test/main/func_json.test | 22 mariadb-10.11.13/mysql-test/main/func_like.result | 19 mariadb-10.11.13/mysql-test/main/func_like.test | 15 mariadb-10.11.13/mysql-test/main/func_regexp_pcre.result | 28 mariadb-10.11.13/mysql-test/main/func_regexp_pcre.test | 2 mariadb-10.11.13/mysql-test/main/gis-precise.result | 8 mariadb-10.11.13/mysql-test/main/gis-precise.test | 8 mariadb-10.11.13/mysql-test/main/gis.result | 32 mariadb-10.11.13/mysql-test/main/gis.test | 32 mariadb-10.11.13/mysql-test/main/group_by.result | 74 mariadb-10.11.13/mysql-test/main/group_by.test | 22 mariadb-10.11.13/mysql-test/main/group_min_max.result | 24 mariadb-10.11.13/mysql-test/main/group_min_max.test | 36 mariadb-10.11.13/mysql-test/main/insert.result | 72 mariadb-10.11.13/mysql-test/main/insert.test | 56 mariadb-10.11.13/mysql-test/main/insert_returning.result | 2 mariadb-10.11.13/mysql-test/main/insert_returning.test | 2 mariadb-10.11.13/mysql-test/main/insert_select.result | 135 mariadb-10.11.13/mysql-test/main/insert_select.test | 56 mariadb-10.11.13/mysql-test/main/join.result | 29 mariadb-10.11.13/mysql-test/main/join.test | 25 mariadb-10.11.13/mysql-test/main/join_cache.result | 26 mariadb-10.11.13/mysql-test/main/join_cache.test | 27 mariadb-10.11.13/mysql-test/main/join_nested.result | 12 mariadb-10.11.13/mysql-test/main/join_nested.test | 13 mariadb-10.11.13/mysql-test/main/join_nested_jcl6.result | 12 mariadb-10.11.13/mysql-test/main/large_pages.opt | 2 mariadb-10.11.13/mysql-test/main/large_pages.result | 1 mariadb-10.11.13/mysql-test/main/large_pages.test | 4 mariadb-10.11.13/mysql-test/main/long_unique.result | 22 mariadb-10.11.13/mysql-test/main/long_unique.test | 22 mariadb-10.11.13/mysql-test/main/lowercase_table2.result | 2 mariadb-10.11.13/mysql-test/main/lowercase_view.result | 12 mariadb-10.11.13/mysql-test/main/lowercase_view.test | 12 mariadb-10.11.13/mysql-test/main/mariadb-upgrade-service.result | 35 mariadb-10.11.13/mysql-test/main/mariadb-upgrade-service.test | 113 mariadb-10.11.13/mysql-test/main/mdev-35721-ubsan.result | 21 mariadb-10.11.13/mysql-test/main/mdev-35721-ubsan.test | 22 mariadb-10.11.13/mysql-test/main/mdl_sync.result | 5 mariadb-10.11.13/mysql-test/main/mdl_sync.test | 8 mariadb-10.11.13/mysql-test/main/merge.result | 17 mariadb-10.11.13/mysql-test/main/merge.test | 17 mariadb-10.11.13/mysql-test/main/multi_update.result | 20 mariadb-10.11.13/mysql-test/main/multi_update.test | 28 mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.opt | 1 mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.result | 8 mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.test | 8 mariadb-10.11.13/mysql-test/main/myisam-big.result | 8 mariadb-10.11.13/mysql-test/main/myisam-big.test | 13 mariadb-10.11.13/mysql-test/main/mysql-interactive.result | 4 mariadb-10.11.13/mysql-test/main/mysql-interactive.test | 11 mariadb-10.11.13/mysql-test/main/mysql_upgrade-34014.result | 2 mariadb-10.11.13/mysql-test/main/mysql_upgrade.result | 23 mariadb-10.11.13/mysql-test/main/mysql_upgrade.test | 27 mariadb-10.11.13/mysql-test/main/mysqld--help.result | 3 mariadb-10.11.13/mysql-test/main/mysqldump-system.result | 6 mariadb-10.11.13/mysql-test/main/mysqldump.result | 33 mariadb-10.11.13/mysql-test/main/mysqldump.test | 11 mariadb-10.11.13/mysql-test/main/mysqlslap.result | 3 mariadb-10.11.13/mysql-test/main/mysqlslap.test | 6 mariadb-10.11.13/mysql-test/main/mysqltest.result | 9 mariadb-10.11.13/mysql-test/main/mysqltest.test | 6 mariadb-10.11.13/mysql-test/main/partition_myisam.result | 21 mariadb-10.11.13/mysql-test/main/partition_myisam.test | 28 mariadb-10.11.13/mysql-test/main/query_cache.result | 23 mariadb-10.11.13/mysql-test/main/query_cache.test | 22 mariadb-10.11.13/mysql-test/main/range_notembedded.result | 67 mariadb-10.11.13/mysql-test/main/range_notembedded.test | 48 mariadb-10.11.13/mysql-test/main/secondary_key_costs.result | 76 mariadb-10.11.13/mysql-test/main/secondary_key_costs.test | 37 mariadb-10.11.13/mysql-test/main/skip_grants.result | 8 mariadb-10.11.13/mysql-test/main/skip_grants.test | 11 mariadb-10.11.13/mysql-test/main/sp-bugs.result | 9 mariadb-10.11.13/mysql-test/main/sp-bugs.test | 20 mariadb-10.11.13/mysql-test/main/sp-row.result | 41 mariadb-10.11.13/mysql-test/main/sp-row.test | 61 mariadb-10.11.13/mysql-test/main/subselect.result | 20 mariadb-10.11.13/mysql-test/main/subselect.test | 10 mariadb-10.11.13/mysql-test/main/subselect_elimination.result | 12 mariadb-10.11.13/mysql-test/main/subselect_elimination.test | 7 mariadb-10.11.13/mysql-test/main/subselect_no_exists_to_in.result | 20 mariadb-10.11.13/mysql-test/main/subselect_no_mat.result | 20 mariadb-10.11.13/mysql-test/main/subselect_no_opts.result | 20 mariadb-10.11.13/mysql-test/main/subselect_no_scache.result | 20 mariadb-10.11.13/mysql-test/main/subselect_no_semijoin.result | 20 mariadb-10.11.13/mysql-test/main/temp_table_frm.result | 6 mariadb-10.11.13/mysql-test/main/temp_table_frm.test | 13 mariadb-10.11.13/mysql-test/main/timezone.test | 2 mariadb-10.11.13/mysql-test/main/trigger_null.result | 15 mariadb-10.11.13/mysql-test/main/trigger_null.test | 11 mariadb-10.11.13/mysql-test/main/type_binary.result | 58 mariadb-10.11.13/mysql-test/main/type_binary.test | 11 mariadb-10.11.13/mysql-test/main/type_blob.result | 190 mariadb-10.11.13/mysql-test/main/type_blob.test | 45 mariadb-10.11.13/mysql-test/main/type_num_innodb.result | 128 mariadb-10.11.13/mysql-test/main/type_varbinary.result | 42 mariadb-10.11.13/mysql-test/main/type_varbinary.test | 10 mariadb-10.11.13/mysql-test/main/update.result | 80 mariadb-10.11.13/mysql-test/main/update.test | 40 mariadb-10.11.13/mysql-test/main/userstat.result | 7 mariadb-10.11.13/mysql-test/main/userstat.test | 7 mariadb-10.11.13/mysql-test/main/view.result | 49 mariadb-10.11.13/mysql-test/main/view.test | 30 mariadb-10.11.13/mysql-test/main/view_grant.result | 46 mariadb-10.11.13/mysql-test/main/view_grant.test | 47 mariadb-10.11.13/mysql-test/mariadb-test-run.pl | 36 mariadb-10.11.13/mysql-test/std_data/galera_certs/galera.root.crt | 24 mariadb-10.11.13/mysql-test/suite/archive/archive-big.test | 3 mariadb-10.11.13/mysql-test/suite/atomic/README.txt | 2 mariadb-10.11.13/mysql-test/suite/atomic/alter_table.inc | 198 mariadb-10.11.13/mysql-test/suite/atomic/alter_table.opt | 1 mariadb-10.11.13/mysql-test/suite/atomic/alter_table.result | 3135 ----- mariadb-10.11.13/mysql-test/suite/atomic/alter_table.test | 198 mariadb-10.11.13/mysql-test/suite/atomic/alter_table_aria.test | 2 mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.opt | 1 mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.result | 1396 ++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.test | 7 mariadb-10.11.13/mysql-test/suite/atomic/alter_table_myisam.result | 1741 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_myisam.test | 6 mariadb-10.11.13/mysql-test/suite/atomic/alter_table_rocksdb.test | 2 mariadb-10.11.13/mysql-test/suite/atomic/alter_table_trigger.test | 2 mariadb-10.11.13/mysql-test/suite/atomic/create_table.test | 1 mariadb-10.11.13/mysql-test/suite/atomic/drop_table.test | 1 mariadb-10.11.13/mysql-test/suite/atomic/rename_table.test | 1 mariadb-10.11.13/mysql-test/suite/binlog/r/binlog_commit_fail.result | 116 mariadb-10.11.13/mysql-test/suite/binlog/r/binlog_mysqlbinlog_warn_stop_position.result | 45 mariadb-10.11.13/mysql-test/suite/binlog/t/binlog_commit_fail.test | 135 mariadb-10.11.13/mysql-test/suite/binlog/t/binlog_mysqlbinlog_warn_stop_position.test | 13 mariadb-10.11.13/mysql-test/suite/binlog_encryption/encrypted_master.test | 1 mariadb-10.11.13/mysql-test/suite/binlog_encryption/rpl_parallel_innodb_lock_conflict.result | 7 mariadb-10.11.13/mysql-test/suite/encryption/r/doublewrite_debug.result | 24 mariadb-10.11.13/mysql-test/suite/encryption/t/doublewrite_debug.opt | 2 mariadb-10.11.13/mysql-test/suite/encryption/t/doublewrite_debug.test | 30 mariadb-10.11.13/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt | 2 mariadb-10.11.13/mysql-test/suite/engines/iuds/r/insert_time.result | 4 mariadb-10.11.13/mysql-test/suite/federated/federatedx.result | 2 mariadb-10.11.13/mysql-test/suite/federated/federatedx.test | 2 mariadb-10.11.13/mysql-test/suite/federated/federatedx_create_handlers.result | 4 mariadb-10.11.13/mysql-test/suite/federated/federatedx_create_handlers.test | 11 mariadb-10.11.13/mysql-test/suite/funcs_2/t/innodb_charset.test | 2 mariadb-10.11.13/mysql-test/suite/galera/disabled.def | 6 mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_master.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_replica_2primary.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_slave.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/galera_3nodes_as_slave.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera/galera_4nodes.cnf | 8 mariadb-10.11.13/mysql-test/suite/galera/include/auto_increment_offset_save.inc | 1 mariadb-10.11.13/mysql-test/suite/galera/include/galera_dump_sr_table.inc | 1 mariadb-10.11.13/mysql-test/suite/galera/include/galera_st_shutdown_slave.inc | 1 mariadb-10.11.13/mysql-test/suite/galera/include/galera_start_replication.inc | 4 mariadb-10.11.13/mysql-test/suite/galera/include/galera_wsrep_recover.inc | 4 mariadb-10.11.13/mysql-test/suite/galera/include/wait_condition_with_debug_and_kill.inc | 35 mariadb-10.11.13/mysql-test/suite/galera/r/GAL-401.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-20225.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-20793.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-21479.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-25389.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-26266.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-33136.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-34647.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-35748.result | 31 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-35946.result | 16 mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-36116.result | 22 mariadb-10.11.13/mysql-test/suite/galera/r/MW-284.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/MW-329.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/MW-329F.result | 25 mariadb-10.11.13/mysql-test/suite/galera/r/MW-416.result | 5 mariadb-10.11.13/mysql-test/suite/galera/r/galera_2primary_replica.result | 5 mariadb-10.11.13/mysql-test/suite/galera/r/galera_alter_engine_myisam.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/galera_applier_ftwrl_table_alter.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_as_slave_nonprim.result | 3 mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_group_commit.result | 685 - mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_lock_table.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_kill,debug.rdiff | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_kill_debug.result | 5 mariadb-10.11.13/mysql-test/suite/galera/r/galera_binlog_checksum.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/galera_binlog_stmt_autoinc.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_circular_replication.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ddl_fk_conflict.result | 3 mariadb-10.11.13/mysql-test/suite/galera/r/galera_defaults.result | 3 mariadb-10.11.13/mysql-test/suite/galera/r/galera_gcs_fragment.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/galera_inject_bf_long_wait.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump,debug.rdiff | 15 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump,release.rdiff | 15 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump.result | 10 mariadb-10.11.13/mysql-test/suite/galera/r/galera_nonPK_and_PA.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_parallel_apply_lock_table.result | 7 mariadb-10.11.13/mysql-test/suite/galera/r/galera_parallel_simple.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/galera_partitioned_tables.result | 176 mariadb-10.11.13/mysql-test/suite/galera/r/galera_restart_replica.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequence_engine.result | 8 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences,binlogoff.rdiff | 11 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences.result | 16 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences_bf_kill.result | 152 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences_transaction.result | 350 mariadb-10.11.13/mysql-test/suite/galera/r/galera_slave_replay.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_split_brain.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl.result | 3 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_cipher.result | 30 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_compression.result | 3 mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_upgrade.result | 3 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup,debug.rdiff | 6 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_force_recovery,debug.rdiff | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid,debug.rdiff | 210 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid.result | 534 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_logarchive,debug.rdiff | 2 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump,debug.rdiff | 15 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump,release.rdiff | 15 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,debug.rdiff | 15 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,release.rdiff | 15 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync,debug.rdiff | 2 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_gtid,debug.rdiff | 210 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_gtid.result | 534 mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_recv_auto,debug.rdiff | 2 mariadb-10.11.13/mysql-test/suite/galera/r/galera_strict_require_innodb.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/galera_strict_require_primary_key.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/galera_toi_ddl_nonconflicting.result | 80 mariadb-10.11.13/mysql-test/suite/galera/r/galera_var_replicate_myisam_on.result | 27 mariadb-10.11.13/mysql-test/suite/galera/r/galera_var_slave_threads.result | 1 mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_during_ist.result | 112 mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_joined_apply.result | 94 mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_joined_skip.result | 102 mariadb-10.11.13/mysql-test/suite/galera/r/galera_wan.result | 12 mariadb-10.11.13/mysql-test/suite/galera/r/galera_wsrep_provider_options_syntax.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/galera_wsrep_schema_detached.result | 9 mariadb-10.11.13/mysql-test/suite/galera/r/mdev-29775.result | 84 mariadb-10.11.13/mysql-test/suite/galera/r/mdev-30653.result | 2 mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#198.result | 3 mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33,debug.rdiff | 12 mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33,release.rdiff | 15 mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33.result | 4 mariadb-10.11.13/mysql-test/suite/galera/r/wsrep_mode_strict_replication.result | 2 mariadb-10.11.13/mysql-test/suite/galera/suite.pm | 80 mariadb-10.11.13/mysql-test/suite/galera/t/GAL-401.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/GCF-939.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-10715.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-15443.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-18832.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-20225.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-20793.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-21479.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-22227.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-22708.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-24143.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-24327.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-25389.test | 7 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-26266.test | 3 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-26597.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27001.opt | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27001.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27123.opt | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27862.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-28053.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-29293.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-29512.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-32549.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33136.test | 5 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33828.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33828.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-34647.cnf | 13 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-34647.test | 3 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35748.opt | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35748.test | 22 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35946.test | 39 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-36116.test | 43 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-6860.cnf | 5 mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-6860.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MW-259.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MW-284.test | 7 mariadb-10.11.13/mysql-test/suite/galera/t/MW-313.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera/t/MW-329.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera/t/MW-329.test | 10 mariadb-10.11.13/mysql-test/suite/galera/t/MW-329F.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera/t/MW-329F.test | 105 mariadb-10.11.13/mysql-test/suite/galera/t/MW-360-master.opt | 1 mariadb-10.11.13/mysql-test/suite/galera/t/MW-369.inc | 2 mariadb-10.11.13/mysql-test/suite/galera/t/MW-416.test | 73 mariadb-10.11.13/mysql-test/suite/galera/t/MW-86-wait8.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera/t/binlog_checksum.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/create.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera#414.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera#500.test | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_2primary_replica.test | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_MDEV-29512.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_alter_engine_myisam.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.test | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_as_slave_ctas.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_as_slave_nonprim.test | 11 mariadb-10.11.13/mysql-test/suite/galera/t/galera_autoinc_sst_mariabackup.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_backup_stage.test | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_flush_for_export.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_lock_table.test | 11 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_ps.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_background_statistics.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_background_statistics.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_kill.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_kill_debug.test | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_lock_wait.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_checksum.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_checksum.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_max.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_row_image.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_stmt_autoinc.test | 8 mariadb-10.11.13/mysql-test/suite/galera/t/galera_cache_index.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_can_run_toi.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_change_user.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_circular_replication.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_concurrent_ctas.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_create_trigger.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ctas.test | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_fk_conflict.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_fk_conflict.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_multiline.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_defaults.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_defaults.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_disallow_local_gtid.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_encrypt_tmp_files.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_fk_truncate.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera/t/galera_flush_local.test | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_forced_binlog_format.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcs_fragment.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcs_max_packet_size.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_server_id.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.test | 15 mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_trx_conflict.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_inject_bf_long_wait.test | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_insert_ignore.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_insert_multi.test | 7 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_MDEV-28423.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_MDEV-28583.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup_innodb_flush_logs.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup_verify_ca.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mysqldump.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mysqldump.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_progress.cnf | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_recv_bind.cnf | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_restart_joiner.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_rsync.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_rsync_verify_ca.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_kill_applier.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_kill_smallchanges.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_load_data.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_log_bin_opt.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_many_rows.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_13787.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_13787.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_15611.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdl_race.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_nonPK_and_PA.test | 7 mariadb-10.11.13/mysql-test/suite/galera/t/galera_nopk_unicode.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_apply_lock_table.test | 11 mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_autoinc_largetrx.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_autoinc_manytrx.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_simple.test | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_partitioned_tables.test | 133 mariadb-10.11.13/mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_pc_recovery.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache_invalidate.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache_sync_wait.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_read_only.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_repl_key_format_flat16.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_nochanges.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_replica.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_replica.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_savepoint_replay.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequence_engine.test | 13 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.combinations | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.test | 27 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.combinations | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.test | 115 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.combinations | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.test | 255 mariadb-10.11.13/mysql-test/suite/galera/t/galera_server.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_set_position_after_cert_failure.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_slave_replay.test | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sp_bf_abort.inc | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_split_brain.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sql_log_bin_zero.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_cipher.cnf | 11 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_cipher.test | 82 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_compression.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_compression.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_upgrade.test | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_encrypted.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_data_dir.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_force_recovery.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.cnf | 28 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.test | 29 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_logarchive.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_lost_found.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_table_options.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mysqldump.cnf | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mysqldump_with_key.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync2.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_binlogname.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_data_dir.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_capath.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_key.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_server.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_gtid.cnf | 23 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_gtid.test | 29 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_logbasename.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_recv_auto.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_cluster.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_local_index.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_local_state.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_strict_require_innodb.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_strict_require_primary_key.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_suspend_slave.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sync_wait_upto-master.opt | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_sync_wait_upto.test | 3 mariadb-10.11.13/mysql-test/suite/galera/t/galera_table_with_hyphen.inc | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_temporary_sequences.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_threadpool.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_threadpool.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_toi_ddl_nonconflicting.test | 60 mariadb-10.11.13/mysql-test/suite/galera/t/galera_toi_ftwrl.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_transaction_read_only.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_udf.cnf | 7 mariadb-10.11.13/mysql-test/suite/galera/t/galera_unicode_identifiers.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_v1_row_events.cnf | 7 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_OSU_method2.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_auto_inc_control_on.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_ignore_apply_errors.test | 39 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_notify_ssl_ipv6.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_aria_off.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_aria_on.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_myisam_off.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_myisam_on.test | 86 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_slave_threads.test | 17 mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_wsrep_mode.test | 6 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_during_ist.cnf | 20 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_during_ist.test | 165 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_apply.cnf | 21 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_apply.test | 73 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_begin.inc | 79 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_end.inc | 33 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_skip.cnf | 21 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_skip.test | 100 mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan.test | 12 mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan_restart_ist.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan_restart_sst.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_log_conficts.cnf | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_mode.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_provider_options_syntax.test | 5 mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_schema_detached.test | 14 mariadb-10.11.13/mysql-test/suite/galera/t/mdev-29775.test | 81 mariadb-10.11.13/mysql-test/suite/galera/t/mdev-30653.test | 4 mariadb-10.11.13/mysql-test/suite/galera/t/mdev-31285.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#198.cnf | 7 mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#198.test | 11 mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#201.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#247.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#31.test | 2 mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#33.cnf | 5 mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#332.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/rename.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/view.test | 1 mariadb-10.11.13/mysql-test/suite/galera/t/wsrep_mode_strict_replication.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/disabled.def | 3 mariadb-10.11.13/mysql-test/suite/galera_3nodes/galera_2x3nodes.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/galera_3nodes.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/MDEV-36360.result | 61 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera-features#115.result | 41 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_2_cluster.result | 35 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_garbd.result | 8 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result | 8 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result | 8 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_A.result | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_B.result | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_C.result | 8 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_parallel_apply_3nodes.result | 2 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_pc_weight.result | 10 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_safe_to_bootstrap.result | 4 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_sst_donor_non_prim.result | 26 mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_vote_rejoin_mysqldump.result | 2 mariadb-10.11.13/mysql-test/suite/galera_3nodes/suite.pm | 82 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/GAL-501.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/GCF-354.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/MDEV-36360.test | 110 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#115.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#115.test | 89 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#119.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.cnf | 9 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.combinations | 5 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.test | 75 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_allowlist.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_certification_ccc.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_duplicate_primary_value.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_evs_suspect_timeout.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_garbd.test | 11 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test | 28 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.cnf | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.test | 80 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup_section.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync_section.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_A.test | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_B.test | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_C.test | 8 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_parallel_apply_3nodes.test | 2 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_pc_bootstrap.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_pc_weight.test | 10 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_safe_to_bootstrap.test | 5 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ssl_reload.cnf | 6 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.cnf | 4 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.test | 64 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_vote_rejoin_mysqldump.test | 2 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema_init.test | 2 mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/inconsistency_shutdown.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result | 10 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/r/galera_sr_kill_slave_before_apply.result | 2 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/suite.pm | 39 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-606.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-817.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-832.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/MDEV-26707.test | 8 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_isolate_master.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_join_slave.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_master.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback2.test | 2 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_before_apply.test | 3 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.test | 1 mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr-master.opt | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/disabled.def | 4 mariadb-10.11.13/mysql-test/suite/galera_sr/r/MENT-2042.result | 9 mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_cc_master.result | 2 mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_kill_all_norecovery.result | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_myisam.result | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/r/mysql-wsrep-features#148.result | 2 mariadb-10.11.13/mysql-test/suite/galera_sr/suite.pm | 80 mariadb-10.11.13/mysql-test/suite/galera_sr/t/MDEV-27615.test | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/t/MDEV-28971.test | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/t/MENT-2042.test | 23 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_cc_master.test | 2 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_gtid-master.opt | 2 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.cnf | 3 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.test | 3 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_slave.cnf | 2 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_myisam.test | 2 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.cnf | 5 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.test | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_shutdown_slave.test | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_small_gcache.cnf | 7 mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#14.test | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#148.test | 2 mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#22.test | 1 mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#96.test | 4 mariadb-10.11.13/mysql-test/suite/gcol/r/innodb_virtual_basic.result | 2 mariadb-10.11.13/mysql-test/suite/gcol/r/innodb_virtual_stats.result | 52 mariadb-10.11.13/mysql-test/suite/gcol/t/innodb_virtual_basic.test | 37 mariadb-10.11.13/mysql-test/suite/innodb/r/alter_copy_bulk.result | 21 mariadb-10.11.13/mysql-test/suite/innodb/r/alter_partitioned_debug.result | 26 mariadb-10.11.13/mysql-test/suite/innodb/r/autoinc_persist,desc.rdiff | 91 mariadb-10.11.13/mysql-test/suite/innodb/r/autoinc_persist.result | 37 mariadb-10.11.13/mysql-test/suite/innodb/r/buf_pool_resize_oom.result | 8 mariadb-10.11.13/mysql-test/suite/innodb/r/doublewrite.result | 18 mariadb-10.11.13/mysql-test/suite/innodb/r/foreign_key.result | 21 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb-index-online.result | 26 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result | 4 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result | 47 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result | 14 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result | 25 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result | 26 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_bug52663.result | 4 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_row_lock_time_ms.result | 40 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_stats_auto_recalc_on_nonexistent.result | 6 mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_stats_fetch.result | 2 mariadb-10.11.13/mysql-test/suite/innodb/r/instant_alter_debug,redundant.rdiff | 7 mariadb-10.11.13/mysql-test/suite/innodb/r/instant_alter_debug.result | 11 mariadb-10.11.13/mysql-test/suite/innodb/r/lock_isolation.result | 88 mariadb-10.11.13/mysql-test/suite/innodb/r/lock_memory_debug.result | 2 mariadb-10.11.13/mysql-test/suite/innodb/r/log_upgrade_101_flags.result | 2 mariadb-10.11.13/mysql-test/suite/innodb/r/mem_pressure,32bit.rdiff | 11 mariadb-10.11.13/mysql-test/suite/innodb/r/mem_pressure.result | 33 mariadb-10.11.13/mysql-test/suite/innodb/r/page_cleaner.result | 15 mariadb-10.11.13/mysql-test/suite/innodb/r/recovery_memory.result | 2 mariadb-10.11.13/mysql-test/suite/innodb/r/restart,16k.rdiff | 16 mariadb-10.11.13/mysql-test/suite/innodb/r/restart,32k.rdiff | 16 mariadb-10.11.13/mysql-test/suite/innodb/r/restart,4k.rdiff | 16 mariadb-10.11.13/mysql-test/suite/innodb/r/restart,64k.rdiff | 16 mariadb-10.11.13/mysql-test/suite/innodb/r/restart,8k.rdiff | 16 mariadb-10.11.13/mysql-test/suite/innodb/r/restart.result | 13 mariadb-10.11.13/mysql-test/suite/innodb/r/stat_tables.result | 10 mariadb-10.11.13/mysql-test/suite/innodb/r/stats_persistent.result | 10 mariadb-10.11.13/mysql-test/suite/innodb/t/alter_copy_bulk.test | 21 mariadb-10.11.13/mysql-test/suite/innodb/t/alter_partitioned_debug.test | 42 mariadb-10.11.13/mysql-test/suite/innodb/t/autoinc_persist.test | 21 mariadb-10.11.13/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt | 1 mariadb-10.11.13/mysql-test/suite/innodb/t/buf_pool_resize_oom.test | 27 mariadb-10.11.13/mysql-test/suite/innodb/t/doublewrite.combinations | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/doublewrite.test | 45 mariadb-10.11.13/mysql-test/suite/innodb/t/foreign_key.test | 30 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-index-online.opt | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-index-online.test | 25 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-table-online-master.opt | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt | 1 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test | 4 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt | 1 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test | 73 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test | 28 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt | 1 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt | 1 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test | 35 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt | 3 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test | 61 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_bug52663.test | 4 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_row_lock_time_ms.test | 45 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_stats_auto_recalc_on_nonexistent.test | 14 mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_stats_fetch.test | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/instant_alter_debug.test | 12 mariadb-10.11.13/mysql-test/suite/innodb/t/lock_isolation.test | 134 mariadb-10.11.13/mysql-test/suite/innodb/t/lock_memory_debug.opt | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/lock_memory_debug.test | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/log_upgrade_101_flags.test | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/mdev-15707.opt | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/mem_pressure.opt | 3 mariadb-10.11.13/mysql-test/suite/innodb/t/mem_pressure.test | 36 mariadb-10.11.13/mysql-test/suite/innodb/t/page_cleaner.test | 25 mariadb-10.11.13/mysql-test/suite/innodb/t/purge_secondary.opt | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/recovery_memory.test | 6 mariadb-10.11.13/mysql-test/suite/innodb/t/restart.opt | 2 mariadb-10.11.13/mysql-test/suite/innodb/t/restart.test | 25 mariadb-10.11.13/mysql-test/suite/innodb/t/stat_tables.test | 9 mariadb-10.11.13/mysql-test/suite/innodb/t/stats_persistent.test | 12 mariadb-10.11.13/mysql-test/suite/innodb/t/update_time-master.opt | 1 mariadb-10.11.13/mysql-test/suite/innodb_fts/r/index_table.result | 3 mariadb-10.11.13/mysql-test/suite/innodb_fts/r/innodb_ft_aux_table.result | 9 mariadb-10.11.13/mysql-test/suite/innodb_fts/t/index_table.test | 6 mariadb-10.11.13/mysql-test/suite/innodb_fts/t/innodb_ft_aux_table.test | 9 mariadb-10.11.13/mysql-test/suite/innodb_gis/r/rollback.result | 13 mariadb-10.11.13/mysql-test/suite/innodb_gis/t/rollback.test | 13 mariadb-10.11.13/mysql-test/suite/innodb_gis/t/rtree_purge.test | 1 mariadb-10.11.13/mysql-test/suite/json/r/json_no_table.result | 2 mariadb-10.11.13/mysql-test/suite/mariabackup/full_backup.result | 5 mariadb-10.11.13/mysql-test/suite/mariabackup/full_backup.test | 3 mariadb-10.11.13/mysql-test/suite/mariabackup/incremental_compressed.result | 3 mariadb-10.11.13/mysql-test/suite/mariabackup/incremental_compressed.test | 12 mariadb-10.11.13/mysql-test/suite/mariabackup/log_page_corruption.test | 2 mariadb-10.11.13/mysql-test/suite/mariabackup/partial.result | 4 mariadb-10.11.13/mysql-test/suite/mariabackup/partial_exclude.result | 2 mariadb-10.11.13/mysql-test/suite/mariabackup/partition_notwin.result | 11 mariadb-10.11.13/mysql-test/suite/mariabackup/partition_notwin.test | 25 mariadb-10.11.13/mysql-test/suite/mariabackup/unsupported_redo.result | 4 mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.opt | 1 mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.result | 18 mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.test | 38 mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.cnf | 13 mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.result | 45 mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.test | 83 mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_innodb.test | 1 mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_memory.test | 1 mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_myisam.test | 1 mariadb-10.11.13/mysql-test/suite/perfschema/r/threads_innodb.result | 10 mariadb-10.11.13/mysql-test/suite/perfschema/t/threads_innodb.test | 2 mariadb-10.11.13/mysql-test/suite/plugins/r/server_audit.result | 3 mariadb-10.11.13/mysql-test/suite/plugins/t/server_audit.test | 4 mariadb-10.11.13/mysql-test/suite/rpl/r/parallel_backup_xa_debug.result | 2 mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_create_select_row.result | 158 mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_gtid_crash.result | 2 mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_master_pos_wait.result | 3 mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_parallel_innodb_lock_conflict.result | 7 mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_semi_sync_master_disable_with_slave.result | 41 mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_semi_sync_ssl_stop.result | 53 mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_xa_2pc_multi_engine.result | 26 mariadb-10.11.13/mysql-test/suite/rpl/t/parallel_backup_xa_debug.test | 6 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_create_select_row.test | 161 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt | 2 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_gtid_crash.test | 2 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_heartbeat_basic.test | 1 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_master_pos_wait.test | 1 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_parallel_innodb_lock_conflict.test | 12 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_row_drop_create_temp_table.test | 1 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync.test | 1 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync.test | 1 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync_row.test | 1 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_master_disable_with_slave.test | 68 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test | 100 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_typeconv.test | 1 mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_xa_2pc_multi_engine.test | 63 mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.opt | 1 mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.result | 26 mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.test | 19 mariadb-10.11.13/mysql-test/suite/sql_sequence/grant.result | 47 mariadb-10.11.13/mysql-test/suite/sql_sequence/grant.test | 50 mariadb-10.11.13/mysql-test/suite/sql_sequence/gtid.result | 2 mariadb-10.11.13/mysql-test/suite/sql_sequence/other.result | 1 mariadb-10.11.13/mysql-test/suite/sql_sequence/other.test | 1 mariadb-10.11.13/mysql-test/suite/sql_sequence/replication.result | 2 mariadb-10.11.13/mysql-test/suite/sql_sequence/view.test | 1 mariadb-10.11.13/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result | 30 mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff | 125 mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_innodb.result | 46 mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result | 4 mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result | 4 mariadb-10.11.13/mysql-test/suite/sys_vars/r/wsrep_forced_binlog_format_basic.result | 51 mariadb-10.11.13/mysql-test/suite/sys_vars/r/wsrep_replicate_myisam_basic.result | 15 mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt | 1 mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt | 1 mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test | 37 mariadb-10.11.13/mysql-test/suite/sys_vars/t/sysvars_innodb.opt | 4 mariadb-10.11.13/mysql-test/suite/sys_vars/t/sysvars_innodb.test | 4 mariadb-10.11.13/mysql-test/suite/sys_vars/t/wsrep_forced_binlog_format_basic.test | 46 mariadb-10.11.13/mysql-test/suite/sys_vars/t/wsrep_replicate_myisam_basic.test | 19 mariadb-10.11.13/mysql-test/suite/versioning/r/partition.result | 35 mariadb-10.11.13/mysql-test/suite/versioning/t/partition.test | 43 mariadb-10.11.13/mysql-test/suite/wsrep/README | 1 mariadb-10.11.13/mysql-test/suite/wsrep/include/check_galera_version.inc | 1 mariadb-10.11.13/mysql-test/suite/wsrep/r/plugin.result | 2 mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-gtid-nobinlog.result | 18 mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-gtid.result | 65 mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-v25,binlogon.rdiff | 2 mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep_forced_binlog_format.result | 51 mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep_mixed_case_cmd_arg.result | 8 mariadb-10.11.13/mysql-test/suite/wsrep/suite.pm | 6 mariadb-10.11.13/mysql-test/suite/wsrep/t/binlog_format.cnf | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/foreign_key.test | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/mdev_10186.test | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/mdev_7798.cnf | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/plugin.test | 2 mariadb-10.11.13/mysql-test/suite/wsrep/t/pool_of_threads.test | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/variables.test | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/variables_debug.test | 3 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.cnf | 10 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.test | 28 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid.cnf | 14 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid.test | 73 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover.cnf | 2 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.cnf | 7 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.test | 48 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.cnf | 6 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.opt | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.test | 11 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_rpl.test | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_variables_sst_method.test | 1 mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_variables_wsrep_off.cnf | 1 mariadb-10.11.13/mysys/CMakeLists.txt | 5 mariadb-10.11.13/mysys/mf_keycache.c | 9 mariadb-10.11.13/mysys/my_default.c | 3 mariadb-10.11.13/mysys/my_getopt.c | 3 mariadb-10.11.13/mysys/my_largepage.c | 111 mariadb-10.11.13/mysys/my_pread.c | 9 mariadb-10.11.13/mysys/my_virtual_mem.c | 201 mariadb-10.11.13/plugin/auth_examples/auth_0x0100.c | 4 mariadb-10.11.13/plugin/server_audit/server_audit.c | 12 mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6.result | 23 mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6.test | 12 mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_engines.inc | 13 mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result | 12 mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_memory.result | 12 mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result | 12 mariadb-10.11.13/plugin/userstat/client_stats.cc | 4 mariadb-10.11.13/plugin/versioning/versioning.cc | 1 mariadb-10.11.13/scripts/mysqlhotcopy.sh | 2 mariadb-10.11.13/scripts/wsrep_sst_common.sh | 13 mariadb-10.11.13/scripts/wsrep_sst_mariabackup.sh | 2 mariadb-10.11.13/scripts/wsrep_sst_mysqldump.sh | 4 mariadb-10.11.13/scripts/wsrep_sst_rsync.sh | 2 mariadb-10.11.13/sql/filesort.cc | 49 mariadb-10.11.13/sql/ha_partition.cc | 40 mariadb-10.11.13/sql/ha_sequence.cc | 6 mariadb-10.11.13/sql/ha_sequence.h | 3 mariadb-10.11.13/sql/handle_connections_win.cc | 3 mariadb-10.11.13/sql/handler.cc | 71 mariadb-10.11.13/sql/handler.h | 4 mariadb-10.11.13/sql/item.cc | 14 mariadb-10.11.13/sql/item.h | 47 mariadb-10.11.13/sql/item_cmpfunc.h | 30 mariadb-10.11.13/sql/item_func.cc | 10 mariadb-10.11.13/sql/item_func.h | 7 mariadb-10.11.13/sql/item_geofunc.cc | 26 mariadb-10.11.13/sql/item_jsonfunc.cc | 114 mariadb-10.11.13/sql/item_strfunc.cc | 10 mariadb-10.11.13/sql/item_subselect.cc | 24 mariadb-10.11.13/sql/item_subselect.h | 1 mariadb-10.11.13/sql/lex_string.h | 2 mariadb-10.11.13/sql/log.cc | 41 mariadb-10.11.13/sql/log.h | 1 mariadb-10.11.13/sql/mysql_install_db.cc | 23 mariadb-10.11.13/sql/mysql_upgrade_service.cc | 129 mariadb-10.11.13/sql/mysqld.cc | 41 mariadb-10.11.13/sql/mysqld.h | 1 mariadb-10.11.13/sql/net_serv.cc | 25 mariadb-10.11.13/sql/opt_range.cc | 91 mariadb-10.11.13/sql/opt_range.h | 29 mariadb-10.11.13/sql/rpl_injector.h | 1 mariadb-10.11.13/sql/rpl_mi.cc | 67 mariadb-10.11.13/sql/rpl_mi.h | 11 mariadb-10.11.13/sql/rpl_parallel.cc | 16 mariadb-10.11.13/sql/semisync_master.cc | 4 mariadb-10.11.13/sql/semisync_slave.cc | 9 mariadb-10.11.13/sql/semisync_slave.h | 2 mariadb-10.11.13/sql/signal_handler.cc | 2 mariadb-10.11.13/sql/slave.cc | 72 mariadb-10.11.13/sql/sp_head.cc | 12 mariadb-10.11.13/sql/sql_acl.cc | 17 mariadb-10.11.13/sql/sql_base.cc | 257 mariadb-10.11.13/sql/sql_base.h | 21 mariadb-10.11.13/sql/sql_cache.cc | 2 mariadb-10.11.13/sql/sql_class.cc | 18 mariadb-10.11.13/sql/sql_class.h | 5 mariadb-10.11.13/sql/sql_cmd.h | 1 mariadb-10.11.13/sql/sql_db.cc | 46 mariadb-10.11.13/sql/sql_db.h | 4 mariadb-10.11.13/sql/sql_error.cc | 22 mariadb-10.11.13/sql/sql_insert.cc | 145 mariadb-10.11.13/sql/sql_insert.h | 2 mariadb-10.11.13/sql/sql_lex.cc | 45 mariadb-10.11.13/sql/sql_lex.h | 6 mariadb-10.11.13/sql/sql_parse.cc | 19 mariadb-10.11.13/sql/sql_prepare.cc | 4 mariadb-10.11.13/sql/sql_priv.h | 1 mariadb-10.11.13/sql/sql_reload.cc | 2 mariadb-10.11.13/sql/sql_select.cc | 164 mariadb-10.11.13/sql/sql_show.cc | 31 mariadb-10.11.13/sql/sql_statistics.cc | 11 mariadb-10.11.13/sql/sql_string.h | 2 mariadb-10.11.13/sql/sql_table.cc | 85 mariadb-10.11.13/sql/sql_trigger.cc | 7 mariadb-10.11.13/sql/sql_truncate.cc | 35 mariadb-10.11.13/sql/sql_update.cc | 5 mariadb-10.11.13/sql/sql_view.cc | 15 mariadb-10.11.13/sql/sql_yacc.yy | 2 mariadb-10.11.13/sql/structs.h | 2 mariadb-10.11.13/sql/sys_vars.cc | 11 mariadb-10.11.13/sql/table.cc | 48 mariadb-10.11.13/sql/table.h | 13 mariadb-10.11.13/sql/vers_string.h | 2 mariadb-10.11.13/sql/wsrep_applier.cc | 15 mariadb-10.11.13/sql/wsrep_client_service.cc | 6 mariadb-10.11.13/sql/wsrep_high_priority_service.cc | 4 mariadb-10.11.13/sql/wsrep_mysqld.cc | 114 mariadb-10.11.13/sql/wsrep_mysqld.h | 3 mariadb-10.11.13/sql/wsrep_server_service.cc | 1 mariadb-10.11.13/sql/wsrep_sst.cc | 11 mariadb-10.11.13/sql/wsrep_thd.h | 66 mariadb-10.11.13/sql/wsrep_trans_observer.h | 15 mariadb-10.11.13/sql/wsrep_var.cc | 55 mariadb-10.11.13/sql/wsrep_var.h | 3 mariadb-10.11.13/sql/wsrep_xid.cc | 43 mariadb-10.11.13/sql/wsrep_xid.h | 4 mariadb-10.11.13/sql/yy_mariadb.cc | 2 mariadb-10.11.13/sql/yy_oracle.cc | 2 mariadb-10.11.13/storage/connect/CMakeLists.txt | 6 mariadb-10.11.13/storage/connect/connect.cc | 8 mariadb-10.11.13/storage/connect/plgxml.h | 4 mariadb-10.11.13/storage/connect/tabxml.cpp | 3 mariadb-10.11.13/storage/connect/user_connect.cc | 19 mariadb-10.11.13/storage/federatedx/federatedx_io.cc | 1 mariadb-10.11.13/storage/federatedx/ha_federatedx.cc | 23 mariadb-10.11.13/storage/innobase/CMakeLists.txt | 1 mariadb-10.11.13/storage/innobase/btr/btr0sea.cc | 104 mariadb-10.11.13/storage/innobase/buf/buf0buddy.cc | 327 mariadb-10.11.13/storage/innobase/buf/buf0buf.cc | 2299 +--- mariadb-10.11.13/storage/innobase/buf/buf0dblwr.cc | 73 mariadb-10.11.13/storage/innobase/buf/buf0dump.cc | 8 mariadb-10.11.13/storage/innobase/buf/buf0flu.cc | 239 mariadb-10.11.13/storage/innobase/buf/buf0lru.cc | 139 mariadb-10.11.13/storage/innobase/buf/buf0rea.cc | 7 mariadb-10.11.13/storage/innobase/dict/dict0defrag_bg.cc | 116 mariadb-10.11.13/storage/innobase/dict/dict0dict.cc | 244 mariadb-10.11.13/storage/innobase/dict/dict0load.cc | 2 mariadb-10.11.13/storage/innobase/dict/dict0stats.cc | 730 - mariadb-10.11.13/storage/innobase/dict/dict0stats_bg.cc | 22 mariadb-10.11.13/storage/innobase/fsp/fsp0fsp.cc | 33 mariadb-10.11.13/storage/innobase/fts/fts0config.cc | 2 mariadb-10.11.13/storage/innobase/fts/fts0fts.cc | 13 mariadb-10.11.13/storage/innobase/fts/fts0opt.cc | 2 mariadb-10.11.13/storage/innobase/gis/gis0sea.cc | 24 mariadb-10.11.13/storage/innobase/handler/ha_innodb.cc | 1556 +- mariadb-10.11.13/storage/innobase/handler/ha_innodb.h | 3 mariadb-10.11.13/storage/innobase/handler/handler0alter.cc | 138 mariadb-10.11.13/storage/innobase/handler/i_s.cc | 130 mariadb-10.11.13/storage/innobase/ibuf/ibuf0ibuf.cc | 30 mariadb-10.11.13/storage/innobase/include/btr0sea.h | 10 mariadb-10.11.13/storage/innobase/include/buf0buddy.h | 40 mariadb-10.11.13/storage/innobase/include/buf0buf.h | 446 mariadb-10.11.13/storage/innobase/include/buf0buf.inl | 2 mariadb-10.11.13/storage/innobase/include/buf0dblwr.h | 3 mariadb-10.11.13/storage/innobase/include/buf0lru.h | 4 mariadb-10.11.13/storage/innobase/include/dict0dict.h | 53 mariadb-10.11.13/storage/innobase/include/dict0dict.inl | 4 mariadb-10.11.13/storage/innobase/include/dict0mem.h | 105 mariadb-10.11.13/storage/innobase/include/dict0stats.h | 141 mariadb-10.11.13/storage/innobase/include/dict0stats.inl | 219 mariadb-10.11.13/storage/innobase/include/fil0fil.h | 9 mariadb-10.11.13/storage/innobase/include/fsp0fsp.h | 6 mariadb-10.11.13/storage/innobase/include/ibuf0ibuf.h | 10 mariadb-10.11.13/storage/innobase/include/log0log.h | 191 mariadb-10.11.13/storage/innobase/include/log0recv.h | 12 mariadb-10.11.13/storage/innobase/include/mtr0mtr.h | 9 mariadb-10.11.13/storage/innobase/include/os0file.h | 2 mariadb-10.11.13/storage/innobase/include/row0row.h | 16 mariadb-10.11.13/storage/innobase/include/row0row.inl | 49 mariadb-10.11.13/storage/innobase/include/row0sel.h | 5 mariadb-10.11.13/storage/innobase/include/srv0srv.h | 21 mariadb-10.11.13/storage/innobase/include/trx0trx.h | 26 mariadb-10.11.13/storage/innobase/include/trx0types.h | 9 mariadb-10.11.13/storage/innobase/include/ut0new.h | 1 mariadb-10.11.13/storage/innobase/lock/lock0lock.cc | 72 mariadb-10.11.13/storage/innobase/log/log0crypt.cc | 2 mariadb-10.11.13/storage/innobase/log/log0log.cc | 283 mariadb-10.11.13/storage/innobase/log/log0recv.cc | 159 mariadb-10.11.13/storage/innobase/mtr/mtr0mtr.cc | 272 mariadb-10.11.13/storage/innobase/os/os0file.cc | 22 mariadb-10.11.13/storage/innobase/pars/pars0pars.cc | 5 mariadb-10.11.13/storage/innobase/row/row0ins.cc | 151 mariadb-10.11.13/storage/innobase/row/row0log.cc | 13 mariadb-10.11.13/storage/innobase/row/row0mysql.cc | 20 mariadb-10.11.13/storage/innobase/row/row0purge.cc | 2 mariadb-10.11.13/storage/innobase/row/row0sel.cc | 120 mariadb-10.11.13/storage/innobase/row/row0uins.cc | 10 mariadb-10.11.13/storage/innobase/row/row0umod.cc | 7 mariadb-10.11.13/storage/innobase/row/row0upd.cc | 4 mariadb-10.11.13/storage/innobase/srv/srv0mon.cc | 17 mariadb-10.11.13/storage/innobase/srv/srv0srv.cc | 35 mariadb-10.11.13/storage/innobase/srv/srv0start.cc | 46 mariadb-10.11.13/storage/innobase/trx/trx0purge.cc | 83 mariadb-10.11.13/storage/innobase/trx/trx0rec.cc | 26 mariadb-10.11.13/storage/innobase/trx/trx0trx.cc | 5 mariadb-10.11.13/storage/innobase/ut/ut0rnd.cc | 2 mariadb-10.11.13/storage/maria/ma_control_file.c | 45 mariadb-10.11.13/storage/maria/ma_pagecache.c | 8 mariadb-10.11.13/storage/maria/ma_unique.c | 6 mariadb-10.11.13/storage/mroonga/CMakeLists.txt | 2 mariadb-10.11.13/storage/mroonga/ha_mroonga.cpp | 8 mariadb-10.11.13/storage/mroonga/vendor/groonga/CMakeLists.txt | 2 mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/db.c | 4 mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/hash.c | 14 mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/ii.c | 4 mariadb-10.11.13/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt | 2 mariadb-10.11.13/storage/myisam/mi_unique.c | 6 mariadb-10.11.13/storage/rocksdb/build_rocksdb.cmake | 112 mariadb-10.11.13/storage/rocksdb/ha_rocksdb.cc | 11 mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result | 10 mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result | 10 mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result | 2 mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result | 2 mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result | 10 mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc | 15 mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result | 20 mariadb-10.11.13/storage/rocksdb/rdb_i_s.cc | 5 mariadb-10.11.13/storage/rocksdb/rdb_source_revision.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/cat_ignore_eagain | 54 mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/config.yml | 872 + mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/ubsan_suppression_list.txt | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/vs2015_install.ps1 | 24 mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/vs2017_install.ps1 | 35 mariadb-10.11.13/storage/rocksdb/rocksdb/.github/workflows/sanity_check.yml | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/.gitignore | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/.travis.yml | 256 mariadb-10.11.13/storage/rocksdb/rocksdb/CMakeLists.txt | 523 mariadb-10.11.13/storage/rocksdb/rocksdb/DEFAULT_OPTIONS_HISTORY.md | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/HISTORY.md | 836 + mariadb-10.11.13/storage/rocksdb/rocksdb/INSTALL.md | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/LANGUAGE-BINDINGS.md | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/Makefile | 1873 ++- mariadb-10.11.13/storage/rocksdb/rocksdb/PLUGINS.md | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/README.md | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/TARGETS | 1090 +- mariadb-10.11.13/storage/rocksdb/rocksdb/USERS.md | 30 mariadb-10.11.13/storage/rocksdb/rocksdb/WINDOWS_PORT.md | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/appveyor.yml | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/buckify_rocksdb.py | 142 mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/check_buck_targets.sh | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/targets_builder.py | 64 mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/targets_cfg.py | 125 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/build_detect_platform | 282 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/check-sources.sh | 36 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/dependencies_platform009.sh | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/fbcode_config.sh | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/fbcode_config4.8.1.sh | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/fbcode_config_platform007.sh | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/fbcode_config_platform009.sh | 179 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/format-diff.sh | 148 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/gnu_parallel | 52 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/make_package.sh | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/regression_build_test.sh | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/rocksdb-lego-determinator | 1063 + mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/run_ci_db_test.ps1 | 30 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/setup_centos7.sh | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache.cc | 72 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_bench.cc | 275 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_bench_tool.cc | 794 + mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_roles.cc | 70 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_roles.h | 134 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_stats.h | 183 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_helpers.h | 125 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_key.cc | 271 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_key.h | 132 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager.cc | 188 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager.h | 191 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc | 506 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_test.cc | 129 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/clock_cache.cc | 180 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache.cc | 496 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache.h | 192 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache_test.cc | 1660 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/sharded_cache.cc | 112 mariadb-10.11.13/storage/rocksdb/rocksdb/cache/sharded_cache.h | 75 mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in | 51 mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake | 7 mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake | 29 mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Findgflags.cmake | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Findsnappy.cmake | 29 mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Finduring.cmake | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/coverage/coverage_test.sh | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/coverage/parse_gcov_output.py | 22 mariadb-10.11.13/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc | 115 mariadb-10.11.13/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.h | 45 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_constants.h | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator.h | 146 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc | 326 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_fetcher.cc | 34 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_fetcher.h | 37 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition.cc | 156 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition.h | 67 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc | 210 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder.cc | 375 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder.h | 103 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc | 672 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache.cc | 102 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache.h | 52 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc | 268 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h | 101 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.cc | 134 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.h | 57 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc | 173 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_meta.cc | 62 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_meta.h | 170 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader.cc | 582 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader.h | 106 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc | 974 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc | 100 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.h | 102 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc | 196 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_index.h | 187 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_format.cc | 145 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_format.h | 149 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc | 132 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h | 83 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_writer.cc | 172 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_writer.h | 83 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc | 1026 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc | 718 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc | 82 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_index_test.cc | 572 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h | 38 mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob_index.h | 179 mariadb-10.11.13/storage/rocksdb/rocksdb/db/builder.cc | 335 mariadb-10.11.13/storage/rocksdb/rocksdb/db/builder.h | 56 mariadb-10.11.13/storage/rocksdb/rocksdb/db/c.cc | 1248 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/c_test.c | 1230 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family.cc | 338 mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family.h | 176 mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family_test.cc | 264 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compact_files_test.cc | 126 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compacted_db_impl.cc | 160 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compacted_db_impl.h | 113 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/clipping_iterator.h | 275 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc | 258 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction.cc | 140 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction.h | 49 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.cc | 853 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.h | 289 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc | 500 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job.cc | 1893 ++- mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job.h | 253 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc | 61 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job_test.cc | 478 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker.cc | 115 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc | 182 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc | 179 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc | 1111 +- mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc | 437 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_service_test.cc | 825 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/file_pri.h | 92 mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/sst_partitioner.cc | 90 mariadb-10.11.13/storage/rocksdb/rocksdb/db/comparator_db_test.cc | 8 mariadb-10.11.13/storage/rocksdb/rocksdb/db/convenience.cc | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/db/corruption_test.cc | 541 mariadb-10.11.13/storage/rocksdb/rocksdb/db/cuckoo_table_db_test.cc | 65 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_basic_test.cc | 2583 +++- mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_blob_index_test.cc | 436 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_block_cache_test.cc | 1228 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_bloom_filter_test.cc | 952 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_compaction_filter_test.cc | 289 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_compaction_test.cc | 3058 ++++- mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_dynamic_level_test.cc | 85 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_encryption_test.cc | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_filesnapshot.cc | 413 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_flush_test.cc | 1958 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc | 173 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h | 118 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl.cc | 1790 ++- mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl.h | 559 - mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc | 1399 +- mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc | 72 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_files.cc | 465 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_open.cc | 858 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc | 103 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc | 245 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_write.cc | 601 - mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_secondary_test.cc | 869 - mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_info_dumper.cc | 63 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_info_dumper.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_inplace_update_test.cc | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_io_failure_test.cc | 135 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_iter.cc | 686 - mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_iter.h | 91 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_iter_stress_test.cc | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_iter_test.cc | 581 - mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_iterator_test.cc | 402 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_kv_checksum_test.cc | 197 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_log_iter_test.cc | 127 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc | 513 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_memtable_test.cc | 89 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_merge_operand_test.cc | 264 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_merge_operator_test.cc | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_options_test.cc | 473 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_properties_test.cc | 520 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_range_del_test.cc | 407 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_secondary_test.cc | 1260 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_sst_test.cc | 640 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_statistics_test.cc | 72 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_table_properties_test.cc | 347 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_tailing_iter_test.cc | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_test.cc | 1522 +- mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_test2.cc | 2788 ++++- mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_test_util.cc | 344 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_test_util.h | 475 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_universal_compaction_test.cc | 383 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_wal_test.cc | 1151 +- mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc | 3217 +++++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc | 121 mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc | 793 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_write_test.cc | 199 mariadb-10.11.13/storage/rocksdb/rocksdb/db/dbformat.cc | 81 mariadb-10.11.13/storage/rocksdb/rocksdb/db/dbformat.h | 233 mariadb-10.11.13/storage/rocksdb/rocksdb/db/dbformat_test.cc | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/db/deletefile_test.cc | 108 mariadb-10.11.13/storage/rocksdb/rocksdb/db/error_handler.cc | 504 mariadb-10.11.13/storage/rocksdb/rocksdb/db/error_handler.h | 68 mariadb-10.11.13/storage/rocksdb/rocksdb/db/error_handler_fs_test.cc | 2663 ++++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/error_handler_test.cc | 871 - mariadb-10.11.13/storage/rocksdb/rocksdb/db/event_helpers.cc | 167 mariadb-10.11.13/storage/rocksdb/rocksdb/db/event_helpers.h | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/db/external_sst_file_basic_test.cc | 732 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc | 389 mariadb-10.11.13/storage/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/db/external_sst_file_test.cc | 531 mariadb-10.11.13/storage/rocksdb/rocksdb/db/fault_injection_test.cc | 116 mariadb-10.11.13/storage/rocksdb/rocksdb/db/filename_test.cc | 39 mariadb-10.11.13/storage/rocksdb/rocksdb/db/flush_job.cc | 688 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/flush_job.h | 70 mariadb-10.11.13/storage/rocksdb/rocksdb/db/flush_job_test.cc | 358 mariadb-10.11.13/storage/rocksdb/rocksdb/db/flush_scheduler.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/db/forward_iterator.cc | 99 mariadb-10.11.13/storage/rocksdb/rocksdb/db/forward_iterator.h | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/db/forward_iterator_bench.cc | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/db/import_column_family_job.cc | 80 mariadb-10.11.13/storage/rocksdb/rocksdb/db/import_column_family_job.h | 22 mariadb-10.11.13/storage/rocksdb/rocksdb/db/import_column_family_test.cc | 151 mariadb-10.11.13/storage/rocksdb/rocksdb/db/internal_stats.cc | 559 - mariadb-10.11.13/storage/rocksdb/rocksdb/db/internal_stats.h | 167 mariadb-10.11.13/storage/rocksdb/rocksdb/db/job_context.h | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/db/kv_checksum.h | 394 mariadb-10.11.13/storage/rocksdb/rocksdb/db/listener_test.cc | 681 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/log_reader.cc | 64 mariadb-10.11.13/storage/rocksdb/rocksdb/db/log_reader.h | 7 mariadb-10.11.13/storage/rocksdb/rocksdb/db/log_test.cc | 171 mariadb-10.11.13/storage/rocksdb/rocksdb/db/log_writer.cc | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/db/log_writer.h | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/db/logs_with_prep_tracker.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/db/lookup_key.h | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/db/malloc_stats.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/db/manual_compaction_test.cc | 209 mariadb-10.11.13/storage/rocksdb/rocksdb/db/memtable.cc | 353 mariadb-10.11.13/storage/rocksdb/rocksdb/db/memtable.h | 163 mariadb-10.11.13/storage/rocksdb/rocksdb/db/memtable_list.cc | 439 mariadb-10.11.13/storage/rocksdb/rocksdb/db/memtable_list.h | 109 mariadb-10.11.13/storage/rocksdb/rocksdb/db/memtable_list_test.cc | 247 mariadb-10.11.13/storage/rocksdb/rocksdb/db/merge_context.h | 28 mariadb-10.11.13/storage/rocksdb/rocksdb/db/merge_helper.cc | 97 mariadb-10.11.13/storage/rocksdb/rocksdb/db/merge_helper.h | 24 mariadb-10.11.13/storage/rocksdb/rocksdb/db/merge_helper_test.cc | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/db/merge_test.cc | 247 mariadb-10.11.13/storage/rocksdb/rocksdb/db/obsolete_files_test.cc | 161 mariadb-10.11.13/storage/rocksdb/rocksdb/db/options_file_test.cc | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/db/output_validator.cc | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/db/output_validator.h | 48 mariadb-10.11.13/storage/rocksdb/rocksdb/db/perf_context_test.cc | 108 mariadb-10.11.13/storage/rocksdb/rocksdb/db/periodic_work_scheduler.cc | 117 mariadb-10.11.13/storage/rocksdb/rocksdb/db/periodic_work_scheduler.h | 78 mariadb-10.11.13/storage/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc | 236 mariadb-10.11.13/storage/rocksdb/rocksdb/db/pinned_iterators_manager.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/db/plain_table_db_test.cc | 148 mariadb-10.11.13/storage/rocksdb/rocksdb/db/pre_release_callback.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/db/prefix_test.cc | 75 mariadb-10.11.13/storage/rocksdb/rocksdb/db/range_del_aggregator.cc | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/db/range_del_aggregator.h | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/db/range_del_aggregator_bench.cc | 22 mariadb-10.11.13/storage/rocksdb/rocksdb/db/range_del_aggregator_test.cc | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc | 22 mariadb-10.11.13/storage/rocksdb/rocksdb/db/range_tombstone_fragmenter.h | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/db/read_callback.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/db/repair.cc | 227 mariadb-10.11.13/storage/rocksdb/rocksdb/db/repair_test.cc | 173 mariadb-10.11.13/storage/rocksdb/rocksdb/db/snapshot_impl.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/db/table_cache.cc | 268 mariadb-10.11.13/storage/rocksdb/rocksdb/db/table_cache.h | 121 mariadb-10.11.13/storage/rocksdb/rocksdb/db/table_properties_collector.cc | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/db/table_properties_collector.h | 88 mariadb-10.11.13/storage/rocksdb/rocksdb/db/table_properties_collector_test.cc | 91 mariadb-10.11.13/storage/rocksdb/rocksdb/db/transaction_log_impl.cc | 19 mariadb-10.11.13/storage/rocksdb/rocksdb/db/transaction_log_impl.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_builder.cc | 1324 +- mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_builder.h | 49 mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_builder_test.cc | 1452 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_edit.cc | 366 mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_edit.h | 241 mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_edit_handler.cc | 980 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_edit_handler.h | 309 mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_edit_test.cc | 373 mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_set.cc | 2965 ++--- mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_set.h | 477 mariadb-10.11.13/storage/rocksdb/rocksdb/db/version_set_test.cc | 2140 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/wal_edit.cc | 204 mariadb-10.11.13/storage/rocksdb/rocksdb/db/wal_edit.h | 166 mariadb-10.11.13/storage/rocksdb/rocksdb/db/wal_edit_test.cc | 214 mariadb-10.11.13/storage/rocksdb/rocksdb/db/wal_manager.cc | 78 mariadb-10.11.13/storage/rocksdb/rocksdb/db/wal_manager.h | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/db/wal_manager_test.cc | 99 mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_batch.cc | 963 + mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_batch_internal.h | 181 mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_batch_test.cc | 414 mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_callback_test.cc | 501 mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_controller.cc | 101 mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_controller.h | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_controller_test.cc | 328 mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_thread.cc | 31 mariadb-10.11.13/storage/rocksdb/rocksdb/db/write_thread.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc | 67 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc | 138 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_common.h | 125 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h | 90 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc | 74 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc | 289 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc | 148 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h | 64 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.cc | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h | 207 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h | 18 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h | 65 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc | 1122 +- mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h | 28 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc | 144 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/expected_state.cc | 616 + mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/expected_state.h | 287 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc | 1037 + mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h | 302 mariadb-10.11.13/storage/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc | 282 mariadb-10.11.13/storage/rocksdb/rocksdb/defs.bzl | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/Gemfile | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/Gemfile.lock | 331 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_config.yml | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_data/authors.yml | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_data/nav.yml | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_docs/getting-started.md | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_includes/doc.html | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2015-10-27-getthreadlist.markdown | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2017-08-24-pinnableslice.markdown | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2017-08-25-flushwal.markdown | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2021-04-12-universal-improvements.markdown | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2021-05-26-integrated-blob-db.markdown | 101 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2021-05-26-online-validation.markdown | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2021-05-27-rocksdb-secondary-cache.markdown | 195 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2021-05-31-dictionary-compression.markdown | 157 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_posts/2021-12-29-ribbon-filter.markdown | 281 mariadb-10.11.13/storage/rocksdb/rocksdb/docs/_top-level/support.md | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/env/composite_env.cc | 464 mariadb-10.11.13/storage/rocksdb/rocksdb/env/composite_env_wrapper.h | 1101 -- mariadb-10.11.13/storage/rocksdb/rocksdb/env/emulated_clock.h | 114 mariadb-10.11.13/storage/rocksdb/rocksdb/env/env.cc | 960 + mariadb-10.11.13/storage/rocksdb/rocksdb/env/env_basic_test.cc | 204 mariadb-10.11.13/storage/rocksdb/rocksdb/env/env_chroot.cc | 349 mariadb-10.11.13/storage/rocksdb/rocksdb/env/env_chroot.h | 35 mariadb-10.11.13/storage/rocksdb/rocksdb/env/env_encryption.cc | 1385 +- mariadb-10.11.13/storage/rocksdb/rocksdb/env/env_encryption_ctr.h | 116 mariadb-10.11.13/storage/rocksdb/rocksdb/env/env_hdfs.cc | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/env/env_posix.cc | 340 mariadb-10.11.13/storage/rocksdb/rocksdb/env/env_test.cc | 1320 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/env/file_system.cc | 189 mariadb-10.11.13/storage/rocksdb/rocksdb/env/file_system_tracer.cc | 519 mariadb-10.11.13/storage/rocksdb/rocksdb/env/file_system_tracer.h | 447 mariadb-10.11.13/storage/rocksdb/rocksdb/env/fs_posix.cc | 416 mariadb-10.11.13/storage/rocksdb/rocksdb/env/fs_readonly.h | 107 mariadb-10.11.13/storage/rocksdb/rocksdb/env/fs_remap.cc | 306 mariadb-10.11.13/storage/rocksdb/rocksdb/env/fs_remap.h | 139 mariadb-10.11.13/storage/rocksdb/rocksdb/env/io_posix.cc | 518 mariadb-10.11.13/storage/rocksdb/rocksdb/env/io_posix.h | 141 mariadb-10.11.13/storage/rocksdb/rocksdb/env/io_posix_test.cc | 140 mariadb-10.11.13/storage/rocksdb/rocksdb/env/mock_env.cc | 742 - mariadb-10.11.13/storage/rocksdb/rocksdb/env/mock_env.h | 191 mariadb-10.11.13/storage/rocksdb/rocksdb/env/mock_env_test.cc | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/env/unique_id_gen.cc | 164 mariadb-10.11.13/storage/rocksdb/rocksdb/env/unique_id_gen.h | 71 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/CMakeLists.txt | 45 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/Makefile | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/c_simple_example.c | 25 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/column_families_example.cc | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/compact_files_example.cc | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/compaction_filter_example.cc | 24 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/multi_processes_example.cc | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/optimistic_transaction_example.cc | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/options_file_example.cc | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/simple_example.cc | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/examples/transaction_example.cc | 18 mariadb-10.11.13/storage/rocksdb/rocksdb/file/delete_scheduler.cc | 103 mariadb-10.11.13/storage/rocksdb/rocksdb/file/delete_scheduler.h | 27 mariadb-10.11.13/storage/rocksdb/rocksdb/file/delete_scheduler_test.cc | 53 mariadb-10.11.13/storage/rocksdb/rocksdb/file/file_prefetch_buffer.cc | 70 mariadb-10.11.13/storage/rocksdb/rocksdb/file/file_prefetch_buffer.h | 118 mariadb-10.11.13/storage/rocksdb/rocksdb/file/file_util.cc | 208 mariadb-10.11.13/storage/rocksdb/rocksdb/file/file_util.h | 71 mariadb-10.11.13/storage/rocksdb/rocksdb/file/filename.cc | 114 mariadb-10.11.13/storage/rocksdb/rocksdb/file/filename.h | 48 mariadb-10.11.13/storage/rocksdb/rocksdb/file/line_file_reader.cc | 68 mariadb-10.11.13/storage/rocksdb/rocksdb/file/line_file_reader.h | 59 mariadb-10.11.13/storage/rocksdb/rocksdb/file/prefetch_test.cc | 1004 + mariadb-10.11.13/storage/rocksdb/rocksdb/file/random_access_file_reader.cc | 363 mariadb-10.11.13/storage/rocksdb/rocksdb/file/random_access_file_reader.h | 120 mariadb-10.11.13/storage/rocksdb/rocksdb/file/random_access_file_reader_test.cc | 483 mariadb-10.11.13/storage/rocksdb/rocksdb/file/read_write_util.cc | 41 mariadb-10.11.13/storage/rocksdb/rocksdb/file/read_write_util.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/file/readahead_file_info.h | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/file/readahead_raf.cc | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/file/readahead_raf.h | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/file/sequence_file_reader.cc | 66 mariadb-10.11.13/storage/rocksdb/rocksdb/file/sequence_file_reader.h | 84 mariadb-10.11.13/storage/rocksdb/rocksdb/file/sst_file_manager_impl.cc | 104 mariadb-10.11.13/storage/rocksdb/rocksdb/file/sst_file_manager_impl.h | 57 mariadb-10.11.13/storage/rocksdb/rocksdb/file/writable_file_writer.cc | 599 - mariadb-10.11.13/storage/rocksdb/rocksdb/file/writable_file_writer.h | 179 mariadb-10.11.13/storage/rocksdb/rocksdb/fuzz/Makefile | 61 mariadb-10.11.13/storage/rocksdb/rocksdb/fuzz/README.md | 160 mariadb-10.11.13/storage/rocksdb/rocksdb/fuzz/db_fuzzer.cc | 164 mariadb-10.11.13/storage/rocksdb/rocksdb/fuzz/db_map_fuzzer.cc | 107 mariadb-10.11.13/storage/rocksdb/rocksdb/fuzz/proto/db_operation.proto | 28 mariadb-10.11.13/storage/rocksdb/rocksdb/fuzz/sst_file_writer_fuzzer.cc | 185 mariadb-10.11.13/storage/rocksdb/rocksdb/fuzz/util.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/hdfs/env_hdfs.h | 66 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/advanced_options.h | 286 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/c.h | 618 + mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/cache.h | 247 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/cleanable.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/compaction_filter.h | 146 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/comparator.h | 41 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/compression_type.h | 40 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/configurable.h | 397 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/convenience.h | 199 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/customizable.h | 233 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/data_structure.h | 51 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/db.h | 420 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/env.h | 465 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/env_encryption.h | 465 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/file_checksum.h | 92 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/file_system.h | 423 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/filter_policy.h | 150 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h | 56 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/io_status.h | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/iostats_context.h | 38 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/iterator.h | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/listener.h | 342 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/memory_allocator.h | 42 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/memtablerep.h | 65 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/merge_operator.h | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/metadata.h | 278 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/options.h | 534 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/perf_context.h | 25 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/persistent_cache.h | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/rate_limiter.h | 41 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/secondary_cache.h | 85 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/slice_transform.h | 40 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/sst_dump_tool.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h | 28 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h | 142 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/statistics.h | 196 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/stats_history.h | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/status.h | 222 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/system_clock.h | 116 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/table.h | 308 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/table_properties.h | 98 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/thread_status.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/trace_record.h | 247 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/trace_record_result.h | 187 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/transaction_log.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/types.h | 58 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/unique_id.h | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/universal_compaction.h | 18 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h | 616 + mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h | 335 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h | 142 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h | 368 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/leveldb_options.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h | 473 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h | 946 + mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/regex.h | 48 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h | 87 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h | 62 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h | 58 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h | 90 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h | 145 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/version.h | 29 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/wal_filter.h | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/write_batch.h | 98 mariadb-10.11.13/storage/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h | 144 mariadb-10.11.13/storage/rocksdb/rocksdb/java/CMakeLists.txt | 66 mariadb-10.11.13/storage/rocksdb/rocksdb/java/Makefile | 213 mariadb-10.11.13/storage/rocksdb/rocksdb/java/crossbuild/docker-build-linux-alpine.sh | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/java/crossbuild/docker-build-linux-centos.sh | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/java/jmh/README.md | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/java/jmh/pom.xml | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java | 100 mariadb-10.11.13/storage/rocksdb/rocksdb/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java | 18 mariadb-10.11.13/storage/rocksdb/rocksdb/java/pom.xml.template | 178 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni.pom | 150 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/cache.cc | 35 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/compression_options.cc | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/concurrent_task_limiter.cc | 90 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/config_options.cc | 88 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/event_listener.cc | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/event_listener_jnicallback.cc | 502 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/event_listener_jnicallback.h | 122 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/iterator.cc | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/jnicallback.cc | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/jnicallback.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/memory_util.cc | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/merge_operator.cc | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/options.cc | 1388 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/options_util.cc | 64 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/portal.h | 1215 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/rocks_callback_object.cc | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/rocksjni.cc | 735 + mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/slice.cc | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/sst_file_reader_iterator.cc | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/sst_partitioner.cc | 42 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/statisticsjni.cc | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/statisticsjni.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/table.cc | 25 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/testable_event_listener.cc | 216 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/transaction.cc | 98 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/ttl.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/write_batch.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/write_batch_test.cc | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/write_batch_with_index.cc | 34 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/write_buffer_manager.cc | 7 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/writebatchhandlerjnicallback.cc | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/java/rocksjni/writebatchhandlerjnicallback.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/samples/src/main/java/OptimisticTransactionSample.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/samples/src/main/java/RocksDBColumnFamilySample.java | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/java/samples/src/main/java/RocksDBSample.java | 31 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractEventListener.java | 334 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractMutableOptions.java | 148 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractNativeReference.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractRocksIterator.java | 7 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractWriteBatch.java | 56 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java | 254 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/BackgroundErrorReason.java | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java | 167 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Cache.java | 27 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ChecksumType.java | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java | 40 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java | 448 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java | 87 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactRangeOptions.java | 7 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionJobInfo.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionReason.java | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompressionType.java | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ConcurrentTaskLimiter.java | 38 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java | 42 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ConfigOptions.java | 47 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/DBOptions.java | 227 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/DBOptionsInterface.java | 295 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/DirectSlice.java | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Env.java | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/EventListener.java | 335 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ExternalFileIngestionInfo.java | 103 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/FileOperationInfo.java | 112 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/FlushJobInfo.java | 186 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/FlushReason.java | 53 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/HistogramType.java | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/IndexShorteningMode.java | 60 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/IndexType.java | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/KeyMayExist.java | 36 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MemTableInfo.java | 103 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java | 157 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableDBOptions.java | 35 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java | 45 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableOptionValue.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/NativeLibraryLoader.java | 109 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/OptionString.java | 256 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Options.java | 387 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/OptionsUtil.java | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ReadOptions.java | 271 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksCallbackObject.java | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksDB.java | 805 + mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksIterator.java | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksIteratorInterface.java | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksObject.java | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SanityLevel.java | 41 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstFileReaderIterator.java | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstFileWriter.java | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstPartitionerFactory.java | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java | 19 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Status.java | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/StringAppendOperator.java | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TableFileCreationBriefInfo.java | 107 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TableFileCreationInfo.java | 86 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TableFileCreationReason.java | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TableFileDeletionInfo.java | 86 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TableProperties.java | 112 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TickerType.java | 74 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TraceOptions.java | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Transaction.java | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TransactionDB.java | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TransactionalDB.java | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TtlDB.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WBWIRocksIterator.java | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteBatch.java | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteBatchInterface.java | 92 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java | 63 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteBufferManager.java | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteOptions.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteStallCondition.java | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteStallInfo.java | 75 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/util/Environment.java | 30 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/BlobOptionsTest.java | 313 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java | 126 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java | 85 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ColumnFamilyTest.java | 513 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java | 39 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java | 50 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/DBOptionsTest.java | 130 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/EventListenerTest.java | 763 + mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/KeyMayExistTest.java | 654 - mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/LRUCacheTest.java | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MemoryUtilTest.java | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MergeTest.java | 45 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MultiGetManyKeysTest.java | 70 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MultiGetTest.java | 525 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java | 94 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MutableOptionsGetSetTest.java | 397 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/OptionsTest.java | 188 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ReadOnlyTest.java | 191 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ReadOptionsTest.java | 62 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/RocksDBTest.java | 87 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/RocksIteratorTest.java | 21 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/SecondaryDBTest.java | 135 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/SstPartitionerTest.java | 72 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TransactionTest.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WriteBatchTest.java | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java | 193 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/test/TestableEventListener.java | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java | 28 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/EnvironmentTest.java | 39 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/TestUtil.java | 19 mariadb-10.11.13/storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/java/understanding_options.md | 79 mariadb-10.11.13/storage/rocksdb/rocksdb/logging/auto_roll_logger.cc | 79 mariadb-10.11.13/storage/rocksdb/rocksdb/logging/auto_roll_logger.h | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/logging/auto_roll_logger_test.cc | 154 mariadb-10.11.13/storage/rocksdb/rocksdb/logging/env_logger.h | 18 mariadb-10.11.13/storage/rocksdb/rocksdb/logging/env_logger_test.cc | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/logging/event_logger.cc | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/logging/logging.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/logging/posix_logger.h | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/arena.cc | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/arena.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/concurrent_arena.h | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc | 281 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/memkind_kmem_allocator.h | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/memory_allocator.cc | 91 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/memory_allocator_test.cc | 243 mariadb-10.11.13/storage/rocksdb/rocksdb/memory/memory_usage.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/hash_linklist_rep.cc | 83 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/hash_linklist_rep.h | 49 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc | 58 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/hash_skiplist_rep.h | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/inlineskiplist.h | 53 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/inlineskiplist_test.cc | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/memtablerep_bench.cc | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/skiplist_test.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/skiplistrep.cc | 87 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/vectorrep.cc | 30 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/write_buffer_manager.cc | 225 mariadb-10.11.13/storage/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc | 236 mariadb-10.11.13/storage/rocksdb/rocksdb/microbench/CMakeLists.txt | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/microbench/db_basic_bench.cc | 134 mariadb-10.11.13/storage/rocksdb/rocksdb/microbench/ribbon_bench.cc | 156 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/histogram.cc | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/histogram.h | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/histogram_test.cc | 38 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/histogram_windowing.cc | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/histogram_windowing.h | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/instrumented_mutex.cc | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/instrumented_mutex.h | 42 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/iostats_context.cc | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/iostats_context_imp.h | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/perf_context.cc | 40 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/perf_context_imp.h | 39 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/perf_step_timer.h | 36 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/persistent_stats_history.cc | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/statistics.cc | 116 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/statistics.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/statistics_test.cc | 49 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/stats_history_test.cc | 396 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/thread_status_updater.cc | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/thread_status_util.cc | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/options/cf_options.cc | 935 + mariadb-10.11.13/storage/rocksdb/rocksdb/options/cf_options.h | 145 mariadb-10.11.13/storage/rocksdb/rocksdb/options/configurable.cc | 785 + mariadb-10.11.13/storage/rocksdb/rocksdb/options/configurable_helper.h | 187 mariadb-10.11.13/storage/rocksdb/rocksdb/options/configurable_test.cc | 880 + mariadb-10.11.13/storage/rocksdb/rocksdb/options/configurable_test.h | 126 mariadb-10.11.13/storage/rocksdb/rocksdb/options/customizable.cc | 137 mariadb-10.11.13/storage/rocksdb/rocksdb/options/customizable_test.cc | 2132 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/options/db_options.cc | 785 + mariadb-10.11.13/storage/rocksdb/rocksdb/options/db_options.h | 53 mariadb-10.11.13/storage/rocksdb/rocksdb/options/options.cc | 91 mariadb-10.11.13/storage/rocksdb/rocksdb/options/options_helper.cc | 2621 +--- mariadb-10.11.13/storage/rocksdb/rocksdb/options/options_helper.h | 204 mariadb-10.11.13/storage/rocksdb/rocksdb/options/options_parser.cc | 559 - mariadb-10.11.13/storage/rocksdb/rocksdb/options/options_parser.h | 66 mariadb-10.11.13/storage/rocksdb/rocksdb/options/options_sanity_check.cc | 38 mariadb-10.11.13/storage/rocksdb/rocksdb/options/options_sanity_check.h | 50 mariadb-10.11.13/storage/rocksdb/rocksdb/options/options_settable_test.cc | 189 mariadb-10.11.13/storage/rocksdb/rocksdb/options/options_test.cc | 3434 +++++- mariadb-10.11.13/storage/rocksdb/rocksdb/plugin/README.md | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/port/jemalloc_helper.h | 65 mariadb-10.11.13/storage/rocksdb/rocksdb/port/lang.h | 64 mariadb-10.11.13/storage/rocksdb/rocksdb/port/port_example.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/port/port_posix.cc | 67 mariadb-10.11.13/storage/rocksdb/rocksdb/port/port_posix.h | 24 mariadb-10.11.13/storage/rocksdb/rocksdb/port/stack_trace.cc | 75 mariadb-10.11.13/storage/rocksdb/rocksdb/port/stack_trace.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/port/sys_time.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/env_default.cc | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/env_win.cc | 1027 - mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/env_win.h | 413 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/io_win.cc | 603 - mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/io_win.h | 270 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/port_win.cc | 126 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/port_win.h | 39 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/win_jemalloc.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/win_logger.cc | 36 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/win_logger.h | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/win_thread.cc | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/win_thread.h | 29 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/xpress_win.cc | 19 mariadb-10.11.13/storage/rocksdb/rocksdb/port/win/xpress_win.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/src.mk | 239 mariadb-10.11.13/storage/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc | 27 mariadb-10.11.13/storage/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc | 73 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/binary_search_index_reader.h | 48 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block.cc | 499 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block.h | 339 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc | 39 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_filter_block.h | 19 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc | 1682 ++- mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_builder.h | 90 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc | 754 - mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_factory.h | 152 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc | 382 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h | 273 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc | 3001 +---- mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_reader.h | 503 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h | 163 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc | 357 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_builder.cc | 74 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_builder.h | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_like_traits.h | 225 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_prefetcher.cc | 100 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_prefetcher.h | 66 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_test.cc | 67 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/block_type.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/cachable_entry.h | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc | 75 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/filter_block.h | 51 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/filter_policy.cc | 1057 + mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/filter_policy_internal.h | 117 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/flush_block_policy.cc | 64 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/flush_block_policy.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/full_filter_block.cc | 95 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/full_filter_block.h | 34 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc | 25 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/hash_index_reader.cc | 147 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/hash_index_reader.h | 49 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/index_builder.cc | 47 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/index_builder.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/index_reader_common.cc | 55 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/index_reader_common.h | 85 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/mock_block_based_table.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc | 252 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h | 54 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc | 30 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc | 162 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h | 159 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc | 207 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h | 54 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/reader_common.cc | 52 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/reader_common.h | 38 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_fetcher.cc | 257 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_fetcher.h | 69 mariadb-10.11.13/storage/rocksdb/rocksdb/table/block_fetcher_test.cc | 521 mariadb-10.11.13/storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc | 79 mariadb-10.11.13/storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h | 27 mariadb-10.11.13/storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc | 134 mariadb-10.11.13/storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h | 29 mariadb-10.11.13/storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h | 8 mariadb-10.11.13/storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc | 98 mariadb-10.11.13/storage/rocksdb/rocksdb/table/format.cc | 496 mariadb-10.11.13/storage/rocksdb/rocksdb/table/format.h | 261 mariadb-10.11.13/storage/rocksdb/rocksdb/table/get_context.cc | 192 mariadb-10.11.13/storage/rocksdb/rocksdb/table/get_context.h | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/table/internal_iterator.h | 67 mariadb-10.11.13/storage/rocksdb/rocksdb/table/iterator_wrapper.h | 45 mariadb-10.11.13/storage/rocksdb/rocksdb/table/merger_test.cc | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/table/merging_iterator.cc | 51 mariadb-10.11.13/storage/rocksdb/rocksdb/table/merging_iterator.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/table/meta_blocks.cc | 363 mariadb-10.11.13/storage/rocksdb/rocksdb/table/meta_blocks.h | 71 mariadb-10.11.13/storage/rocksdb/rocksdb/table/mock_table.cc | 255 mariadb-10.11.13/storage/rocksdb/rocksdb/table/mock_table.h | 178 mariadb-10.11.13/storage/rocksdb/rocksdb/table/multiget_context.h | 94 mariadb-10.11.13/storage/rocksdb/rocksdb/table/persistent_cache_helper.cc | 56 mariadb-10.11.13/storage/rocksdb/rocksdb/table/persistent_cache_options.h | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_bloom.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_builder.cc | 179 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_builder.h | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_factory.cc | 340 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_factory.h | 61 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_index.cc | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_index.h | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc | 58 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_key_coding.h | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_reader.cc | 51 mariadb-10.11.13/storage/rocksdb/rocksdb/table/plain/plain_table_reader.h | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/table/sst_file_dumper.cc | 502 mariadb-10.11.13/storage/rocksdb/rocksdb/table/sst_file_dumper.h | 97 mariadb-10.11.13/storage/rocksdb/rocksdb/table/sst_file_reader.cc | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/table/sst_file_reader_test.cc | 266 mariadb-10.11.13/storage/rocksdb/rocksdb/table/sst_file_writer.cc | 185 mariadb-10.11.13/storage/rocksdb/rocksdb/table/sst_file_writer_collectors.h | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/table/table_builder.h | 139 mariadb-10.11.13/storage/rocksdb/rocksdb/table/table_factory.cc | 65 mariadb-10.11.13/storage/rocksdb/rocksdb/table/table_properties.cc | 139 mariadb-10.11.13/storage/rocksdb/rocksdb/table/table_properties_internal.h | 24 mariadb-10.11.13/storage/rocksdb/rocksdb/table/table_reader.h | 18 mariadb-10.11.13/storage/rocksdb/rocksdb/table/table_reader_bench.cc | 65 mariadb-10.11.13/storage/rocksdb/rocksdb/table/table_reader_caller.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/table/table_test.cc | 2268 ++-- mariadb-10.11.13/storage/rocksdb/rocksdb/table/two_level_iterator.cc | 8 mariadb-10.11.13/storage/rocksdb/rocksdb/table/unique_id.cc | 166 mariadb-10.11.13/storage/rocksdb/rocksdb/table/unique_id_impl.h | 59 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/fault_injection_test_env.cc | 437 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/fault_injection_test_env.h | 225 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/mock_time_env.cc | 38 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/mock_time_env.h | 70 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/sync_point.cc | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/sync_point.h | 70 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/sync_point_impl.cc | 44 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/sync_point_impl.h | 35 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/testharness.cc | 58 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/testharness.h | 71 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/testutil.cc | 464 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/testutil.h | 677 - mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/testutil_test.cc | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/test_util/transaction_test_util.cc | 18 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/folly/folly/Portability.h | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp | 42 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h | 118 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/gcc/ppc-asm.h | 390 mariadb-10.11.13/storage/rocksdb/rocksdb/third-party/gtest-1.8.1/fused-src/gtest/CMakeLists.txt | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/CMakeLists.txt | 35 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/advisor/README.md | 24 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/backup_db.sh | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/benchmark.sh | 343 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/blob_dump.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc | 18 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc | 24 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/check_all_python.py | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/check_format_compatible.sh | 361 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/db_bench.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/db_bench_tool.cc | 2086 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/tools/db_bench_tool_test.cc | 98 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/db_crashtest.py | 442 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/db_repl_stress.cc | 121 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/io_tracer_parser.cc | 25 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/io_tracer_parser_test.cc | 189 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc | 144 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/io_tracer_parser_tool.h | 40 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/ldb_cmd.cc | 932 + mariadb-10.11.13/storage/rocksdb/rocksdb/tools/ldb_cmd_impl.h | 78 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/ldb_cmd_test.cc | 539 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/ldb_test.py | 208 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/ldb_tool.cc | 37 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/rdb/db_wrapper.cc | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/rdb/db_wrapper.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/rdb/rdb.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/reduce_levels_test.cc | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/regression_test.sh | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/report_lite_binary_size.sh | 42 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/restore_db.sh | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/run_blob_bench.sh | 195 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc | 246 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h | 126 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/sst_dump.cc | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/sst_dump_test.cc | 245 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/sst_dump_tool.cc | 660 - mariadb-10.11.13/storage/rocksdb/rocksdb/tools/sst_dump_tool_imp.h | 87 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/trace_analyzer_test.cc | 279 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/trace_analyzer_tool.cc | 585 - mariadb-10.11.13/storage/rocksdb/rocksdb/tools/trace_analyzer_tool.h | 142 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/write_external_sst.sh | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/write_stress.cc | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/tools/write_stress_runner.py | 8 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc | 22 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/block_cache_tracer.h | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc | 36 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/io_tracer.cc | 303 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/io_tracer.h | 185 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/io_tracer_test.cc | 352 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/trace_record.cc | 206 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/trace_record_handler.cc | 190 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/trace_record_handler.h | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/trace_record_result.cc | 146 mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/trace_replay.cc | 817 - mariadb-10.11.13/storage/rocksdb/rocksdb/trace_replay/trace_replay.h | 172 mariadb-10.11.13/storage/rocksdb/rocksdb/util/aligned_buffer.h | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/util/autovector.h | 40 mariadb-10.11.13/storage/rocksdb/rocksdb/util/autovector_test.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/util/bloom_impl.h | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/util/bloom_test.cc | 724 + mariadb-10.11.13/storage/rocksdb/rocksdb/util/build_version.cc.in | 74 mariadb-10.11.13/storage/rocksdb/rocksdb/util/build_version.h | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/util/cast_util.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/util/channel.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/util/coding.h | 107 mariadb-10.11.13/storage/rocksdb/rocksdb/util/coding_lean.h | 101 mariadb-10.11.13/storage/rocksdb/rocksdb/util/coding_test.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/util/compaction_job_stats_impl.cc | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/util/comparator.cc | 90 mariadb-10.11.13/storage/rocksdb/rocksdb/util/compression.h | 221 mariadb-10.11.13/storage/rocksdb/rocksdb/util/compression_context_cache.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/util/crc32c.cc | 221 mariadb-10.11.13/storage/rocksdb/rocksdb/util/crc32c.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/util/crc32c_arm64.cc | 146 mariadb-10.11.13/storage/rocksdb/rocksdb/util/crc32c_arm64.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/util/crc32c_ppc.c | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/util/crc32c_ppc.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/util/crc32c_ppc_asm.S | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/util/crc32c_test.cc | 47 mariadb-10.11.13/storage/rocksdb/rocksdb/util/defer.h | 31 mariadb-10.11.13/storage/rocksdb/rocksdb/util/defer_test.cc | 11 mariadb-10.11.13/storage/rocksdb/rocksdb/util/duplicate_detector.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/util/dynamic_bloom.h | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/util/dynamic_bloom_test.cc | 10 mariadb-10.11.13/storage/rocksdb/rocksdb/util/fastrange.h | 114 mariadb-10.11.13/storage/rocksdb/rocksdb/util/file_checksum_helper.cc | 95 mariadb-10.11.13/storage/rocksdb/rocksdb/util/file_checksum_helper.h | 95 mariadb-10.11.13/storage/rocksdb/rocksdb/util/file_reader_writer_test.cc | 613 - mariadb-10.11.13/storage/rocksdb/rocksdb/util/filelock_test.cc | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/util/filter_bench.cc | 94 mariadb-10.11.13/storage/rocksdb/rocksdb/util/gflags_compat.h | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/util/hash.cc | 128 mariadb-10.11.13/storage/rocksdb/rocksdb/util/hash.h | 103 mariadb-10.11.13/storage/rocksdb/rocksdb/util/hash128.h | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/util/hash_map.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/util/hash_test.cc | 545 - mariadb-10.11.13/storage/rocksdb/rocksdb/util/heap.h | 7 mariadb-10.11.13/storage/rocksdb/rocksdb/util/kv_map.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/util/log_write_bench.cc | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/util/math.h | 242 mariadb-10.11.13/storage/rocksdb/rocksdb/util/math128.h | 310 mariadb-10.11.13/storage/rocksdb/rocksdb/util/murmurhash.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/util/murmurhash.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/util/mutexlock.h | 51 mariadb-10.11.13/storage/rocksdb/rocksdb/util/random.cc | 27 mariadb-10.11.13/storage/rocksdb/rocksdb/util/random.h | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/util/rate_limiter.cc | 439 mariadb-10.11.13/storage/rocksdb/rocksdb/util/rate_limiter.h | 86 mariadb-10.11.13/storage/rocksdb/rocksdb/util/rate_limiter_test.cc | 386 mariadb-10.11.13/storage/rocksdb/rocksdb/util/regex.cc | 50 mariadb-10.11.13/storage/rocksdb/rocksdb/util/repeatable_thread.h | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/util/repeatable_thread_test.cc | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/util/ribbon_alg.h | 1225 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/util/ribbon_config.cc | 506 mariadb-10.11.13/storage/rocksdb/rocksdb/util/ribbon_config.h | 182 mariadb-10.11.13/storage/rocksdb/rocksdb/util/ribbon_impl.h | 1137 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/util/ribbon_test.cc | 1308 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/util/set_comparator.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/util/slice.cc | 223 mariadb-10.11.13/storage/rocksdb/rocksdb/util/slice_test.cc | 54 mariadb-10.11.13/storage/rocksdb/rocksdb/util/status.cc | 37 mariadb-10.11.13/storage/rocksdb/rocksdb/util/stop_watch.h | 36 mariadb-10.11.13/storage/rocksdb/rocksdb/util/string_util.cc | 106 mariadb-10.11.13/storage/rocksdb/rocksdb/util/string_util.h | 55 mariadb-10.11.13/storage/rocksdb/rocksdb/util/thread_guard.h | 41 mariadb-10.11.13/storage/rocksdb/rocksdb/util/thread_list_test.cc | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/util/thread_local.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/util/thread_local.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/util/thread_local_test.cc | 51 mariadb-10.11.13/storage/rocksdb/rocksdb/util/threadpool_imp.cc | 72 mariadb-10.11.13/storage/rocksdb/rocksdb/util/threadpool_imp.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/util/timer.h | 331 mariadb-10.11.13/storage/rocksdb/rocksdb/util/timer_test.cc | 402 mariadb-10.11.13/storage/rocksdb/rocksdb/util/user_comparator_wrapper.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/util/util.h | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/util/vector_iterator.h | 46 mariadb-10.11.13/storage/rocksdb/rocksdb/util/work_queue.h | 150 mariadb-10.11.13/storage/rocksdb/rocksdb/util/work_queue_test.cc | 268 mariadb-10.11.13/storage/rocksdb/rocksdb/util/xxh3p.h | 1648 --- mariadb-10.11.13/storage/rocksdb/rocksdb/util/xxhash.cc | 1181 -- mariadb-10.11.13/storage/rocksdb/rocksdb/util/xxhash.h | 5444 +++++++++- mariadb-10.11.13/storage/rocksdb/rocksdb/util/xxph3.h | 1762 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/backupable/backupable_db.cc | 2543 +++- mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/backupable/backupable_db_impl.h | 29 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/backupable/backupable_db_test.cc | 2557 ++++ mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc | 409 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h | 130 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db.cc | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db.h | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc | 268 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h | 34 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc | 567 - mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc | 28 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.h | 3 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_file.cc | 106 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_file.h | 30 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_log_format.cc | 149 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_log_format.h | 133 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_log_reader.cc | 105 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_log_reader.h | 82 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_log_writer.cc | 139 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/blob_db/blob_log_writer.h | 94 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cache_dump_load.cc | 69 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc | 489 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cache_dump_load_impl.h | 365 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc | 73 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h | 27 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/cassandra_format_test.cc | 28 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc | 206 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/cassandra_row_merge_test.cc | 22 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/cassandra_serialize_test.cc | 1 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/format.h | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/merge_operator.h | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/serialize.h | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/test_utils.cc | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/cassandra/test_utils.h | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc | 340 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h | 34 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc | 173 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/compaction_filters.cc | 56 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h | 41 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/debug.cc | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/env_librados.cc | 104 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/env_librados.md | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/env_librados_test.cc | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/env_mirror.cc | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/env_timed.cc | 286 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/env_timed.h | 97 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/env_timed_test.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/fault_injection_env.cc | 548 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/fault_injection_env.h | 258 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/fault_injection_fs.cc | 994 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/fault_injection_fs.h | 582 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc | 110 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h | 94 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/memory/memory_test.cc | 50 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/memory_allocators.h | 104 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators.cc | 120 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators.h | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/bytesxor.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h | 8 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/max.cc | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/put.cc | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/sortlist.h | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc | 39 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc | 252 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc | 24 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/object_registry.cc | 227 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/object_registry_test.cc | 619 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc | 87 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/options/options_util.cc | 83 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/options/options_util_test.cc | 519 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc | 5 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h | 19 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc | 29 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h | 8 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc | 17 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc | 98 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.h | 9 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.cc | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h | 12 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc | 6 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc | 32 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc | 2 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc | 84 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc | 57 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc | 199 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.h | 14 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc | 138 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc | 43 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/trace/replayer_impl.cc | 316 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/trace/replayer_impl.h | 86 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc | 29 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.h | 82 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/lock_tracker.h | 209 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc | 718 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h | 223 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc | 181 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h | 319 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.cc | 270 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.h | 99 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h | 30 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc | 422 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.AGPLv3 | 661 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.APACHEv2 | 174 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/COPYING.GPLv2 | 339 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/db.h | 76 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/comparator.h | 138 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/ft/ft-status.h | 102 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc | 139 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.h | 174 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc | 222 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.h | 141 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc | 525 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h | 253 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc | 1024 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h | 580 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc | 527 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc | 265 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.h | 178 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc | 520 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.h | 302 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc | 120 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.h | 92 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc | 213 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.h | 124 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/memory.h | 215 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h | 39 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_atomic.h | 130 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h | 83 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_instrumentation.h | 286 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_portability.h | 87 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_pthread.h | 520 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_race_tools.h | 179 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h | 176 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h | 27 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc | 132 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc | 153 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.h | 98 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h | 144 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc | 201 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.h | 141 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt.h | 794 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/omt_impl.h | 1295 ++ mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h | 165 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/status.h | 76 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc | 503 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h | 137 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc | 156 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h | 146 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc | 33 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h | 16 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc | 672 - mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc | 200 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h | 15 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc | 107 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h | 20 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/transaction_base.cc | 267 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/transaction_base.h | 56 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/transaction_lock_mgr.cc | 745 - mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/transaction_lock_mgr.h | 158 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/transaction_test.cc | 568 - mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/transaction_test.h | 45 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/transaction_util.cc | 70 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/transaction_util.h | 52 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc | 772 + mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc | 83 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc | 78 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h | 13 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc | 105 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc | 143 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h | 4 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc | 26 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h | 40 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc | 375 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h | 231 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/ttl/ttl_test.cc | 276 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/wal_filter.cc | 23 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc | 669 - mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc | 655 - mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h | 196 mariadb-10.11.13/storage/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc | 1766 ++- mariadb-10.11.13/storage/spider/CMakeLists.txt | 11 mariadb-10.11.13/storage/spider/ha_spider.cc | 27 mariadb-10.11.13/storage/spider/mysql-test/spider/bg/t/basic_sql.test | 3 mariadb-10.11.13/storage/spider/mysql-test/spider/bg/t/ha.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bg/t/ha_part.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/include/direct_sql_with_comma_pwd_init.inc | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/include/direct_sql_with_tmp_table_init.inc | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/direct_sql_with_comma_pwd.result | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/direct_sql_with_tmp_table.result | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_26345.result | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_29002.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_29163.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_29502.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_29605.result | 19 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_29962.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_30392.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_30408.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_31338.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_31645.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_34003.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_35807.result | 16 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_35874.result | 51 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/mdev_35959.result | 25 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/subquery.result | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/udf_mysql_func_early.result | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/r/udf_mysql_func_early_init_file.result | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/checksum_table_with_quick_mode_3.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/cp932_column.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/delete_with_float_column.inc | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/group_by_order_by_limit.test | 6 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/insert_select.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_19866.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_20100.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_20502.test | 6 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_21884.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_26345.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test | 8 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_29002.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_29008.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_29163.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_29502.test | 5 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_29605.test | 25 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_29962.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_30392.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_30408.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_30649.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_30727.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_31338.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_31645.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_34003.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_34659.test | 3 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_35807.test | 21 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_35874.test | 53 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/mdev_35959.test | 30 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/quick_mode_0.test | 12 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/quick_mode_1.test | 12 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/quick_mode_2.test | 12 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/quick_mode_3.test | 12 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/return_found_rows_insert.test | 6 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/return_found_rows_update.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/select_by_null.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/select_with_backquote.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/slave_trx_isolation.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/sql_mode.inc | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/strict_group_by.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/subquery.test | 1 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/udf_mysql_func_early.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/wrapper_mariadb.test | 6 mariadb-10.11.13/storage/spider/mysql-test/spider/bugfix/t/xa_cmd.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/feature/r/pushdown_case.result | 12 mariadb-10.11.13/storage/spider/mysql-test/spider/feature/t/checksum_table_parallel.inc | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/feature/t/pushdown_case.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/regression/e1121/t/direct_join_by_pkey_key.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/regression/e1121/t/direct_join_by_pkey_pkey.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/regression/e1121/t/load_data.inc | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/regression/e112122/t/group_by_order_by_limit_ok.test | 6 mariadb-10.11.13/storage/spider/mysql-test/spider/regression/e112122/t/load_data_part.inc | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/t/auto_increment.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/t/checksum_table_with_quick_mode_3.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_join.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_join_using.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_left_join.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_left_join_nullable.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_left_right_join_nullable.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_left_right_left_join_nullable.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_right_join.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_right_join_nullable.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_right_left_join_nullable.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/direct_right_left_right_join_nullable.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/ha.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/ha_part.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/partition_cond_push.test | 6 mariadb-10.11.13/storage/spider/mysql-test/spider/t/partition_fulltext.test | 6 mariadb-10.11.13/storage/spider/mysql-test/spider/t/partition_join_pushdown_for_single_partition.test | 6 mariadb-10.11.13/storage/spider/mysql-test/spider/t/pushdown_not_like.test | 4 mariadb-10.11.13/storage/spider/mysql-test/spider/t/quick_mode_0.test | 10 mariadb-10.11.13/storage/spider/mysql-test/spider/t/quick_mode_1.test | 12 mariadb-10.11.13/storage/spider/mysql-test/spider/t/quick_mode_2.test | 12 mariadb-10.11.13/storage/spider/mysql-test/spider/t/quick_mode_3.test | 12 mariadb-10.11.13/storage/spider/mysql-test/spider/t/slave_trx_isolation.test | 2 mariadb-10.11.13/storage/spider/mysql-test/spider/t/timestamp.test | 28 mariadb-10.11.13/storage/spider/mysql-test/spider/t/udf_pushdown.inc | 4 mariadb-10.11.13/storage/spider/spd_db_conn.cc | 39 mariadb-10.11.13/storage/spider/spd_db_include.h | 4 mariadb-10.11.13/storage/spider/spd_db_mysql.cc | 4 mariadb-10.11.13/storage/spider/spd_direct_sql.cc | 4 mariadb-10.11.13/storage/spider/spd_group_by_handler.cc | 10 mariadb-10.11.13/storage/spider/spd_table.cc | 4 mariadb-10.11.13/storage/spider/spd_trx.cc | 219 mariadb-10.11.13/storage/spider/spd_trx.h | 5 mariadb-10.11.13/strings/ctype-bin.c | 2 mariadb-10.11.13/strings/ctype-latin1.c | 3 mariadb-10.11.13/strings/ctype-mb.c | 2 mariadb-10.11.13/strings/ctype-simple.c | 2 mariadb-10.11.13/strings/ctype-uca.inl | 2 mariadb-10.11.13/strings/ctype-ucs2.c | 10 mariadb-10.11.13/strings/ctype-utf8.c | 4 mariadb-10.11.13/strings/json_lib.c | 10 mariadb-10.11.13/strings/strings_def.h | 2 mariadb-10.11.13/support-files/mariadb.service.in | 8 mariadb-10.11.13/support-files/mariadb@.service.in | 8 mariadb-10.11.13/support-files/rpm/server-prein.sh | 23 mariadb-10.11.13/tests/mysql_client_fw.c | 4 mariadb-10.11.13/tests/mysql_client_test.c | 210 mariadb-10.11.13/tpool/aio_liburing.cc | 10 mariadb-10.11.13/tpool/tpool_generic.cc | 1 mariadb-10.11.13/win/packaging/ca/CMakeLists.txt | 5 mariadb-10.11.13/win/upgrade_wizard/CMakeLists.txt | 20 mariadb-10.11.13/wsrep-lib/.github/workflows/cmake.yml | 71 mariadb-10.11.13/wsrep-lib/.gitignore | 3 mariadb-10.11.13/wsrep-lib/CMakeLists.txt | 2 mariadb-10.11.13/wsrep-lib/CONTRIBUTORS.txt | 1 mariadb-10.11.13/wsrep-lib/cmake/boost.cmake | 2 mariadb-10.11.13/wsrep-lib/include/wsrep/client_state.hpp | 6 mariadb-10.11.13/wsrep-lib/include/wsrep/connection_monitor_service.hpp | 71 mariadb-10.11.13/wsrep-lib/include/wsrep/id.hpp | 5 mariadb-10.11.13/wsrep-lib/include/wsrep/provider.hpp | 26 mariadb-10.11.13/wsrep-lib/include/wsrep/seqno.hpp | 5 mariadb-10.11.13/wsrep-lib/include/wsrep/server_state.hpp | 44 mariadb-10.11.13/wsrep-lib/include/wsrep/storage_service.hpp | 11 mariadb-10.11.13/wsrep-lib/include/wsrep/transaction.hpp | 6 mariadb-10.11.13/wsrep-lib/include/wsrep/view.hpp | 4 mariadb-10.11.13/wsrep-lib/src/CMakeLists.txt | 1 mariadb-10.11.13/wsrep-lib/src/client_state.cpp | 16 mariadb-10.11.13/wsrep-lib/src/config_service_v1.cpp | 5 mariadb-10.11.13/wsrep-lib/src/connection_monitor_service_v1.cpp | 142 mariadb-10.11.13/wsrep-lib/src/connection_monitor_service_v1.hpp | 56 mariadb-10.11.13/wsrep-lib/src/id.cpp | 34 mariadb-10.11.13/wsrep-lib/src/provider.cpp | 7 mariadb-10.11.13/wsrep-lib/src/server_state.cpp | 47 mariadb-10.11.13/wsrep-lib/src/transaction.cpp | 21 mariadb-10.11.13/wsrep-lib/src/view.cpp | 2 mariadb-10.11.13/wsrep-lib/src/wsrep_provider_v26.cpp | 33 mariadb-10.11.13/wsrep-lib/src/wsrep_provider_v26.hpp | 2 mariadb-10.11.13/wsrep-lib/test/id_test.cpp | 54 mariadb-10.11.13/wsrep-lib/test/mock_provider.hpp | 22 mariadb-10.11.13/wsrep-lib/test/mock_server_state.hpp | 29 mariadb-10.11.13/wsrep-lib/test/test_utils.cpp | 16 mariadb-10.11.13/wsrep-lib/test/test_utils.hpp | 2 mariadb-10.11.13/wsrep-lib/test/transaction_test.cpp | 4 mariadb-10.11.13/wsrep-lib/test/transaction_test_2pc.cpp | 45 mariadb-10.11.13/wsrep-lib/test/transaction_test_xa.cpp | 29 mariadb-10.11.13/wsrep-lib/wsrep-API/v26/CONTRIBUTORS.txt | 1 mariadb-10.11.13/wsrep-lib/wsrep-API/v26/wsrep_connection_monitor_service.h | 134 2471 files changed, 239030 insertions(+), 65503 deletions(-) diff -Nru mariadb-10.11.11/CMakeLists.txt mariadb-10.11.13/CMakeLists.txt --- mariadb-10.11.11/CMakeLists.txt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/CMakeLists.txt 2025-05-19 16:14:23.000000000 +0000 @@ -14,7 +14,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA -CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12) +CMAKE_MINIMUM_REQUIRED(VERSION 2.8...3.12) IF(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) # Setting build type to RelWithDebInfo as none was specified. @@ -31,7 +31,7 @@ # in RPM's: #set(CPACK_RPM_SPEC_MORE_DEFINE "%define __spec_install_post /bin/true") -FOREACH(p CMP0022 CMP0046 CMP0040 CMP0048 CMP0054 CMP0067 CMP0074 CMP0075 CMP0069 CMP0135) +FOREACH(p CMP0022 CMP0046 CMP0040 CMP0048 CMP0054 CMP0056 CMP0067 CMP0074 CMP0075 CMP0069 CMP0135 CMP0091) IF(POLICY ${p}) CMAKE_POLICY(SET ${p} NEW) ENDIF() @@ -246,7 +246,7 @@ OPTION(WITH_MSAN "Enable memory sanitizer" OFF) IF (WITH_MSAN) - MY_CHECK_AND_SET_COMPILER_FLAG("-fsanitize=memory -fsanitize-memory-track-origins -U_FORTIFY_SOURCE" DEBUG RELWITHDEBINFO) + MY_CHECK_AND_SET_COMPILER_FLAG("-fsanitize=memory -fsanitize-memory-track-origins -U_FORTIFY_SOURCE") IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)) MESSAGE(FATAL_ERROR "Compiler doesn't support -fsanitize=memory flags") @@ -256,7 +256,7 @@ MESSAGE(FATAL_ERROR "C++ Compiler requires support for -stdlib=libc++") ENDIF() SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") - MY_CHECK_AND_SET_LINKER_FLAG("-fsanitize=memory" DEBUG RELWITHDEBINFO) + MY_CHECK_AND_SET_LINKER_FLAG("-fsanitize=memory") IF(NOT HAVE_LINK_FLAG__fsanitize_memory) MESSAGE(FATAL_ERROR "Linker doesn't support -fsanitize=memory flags") ENDIF() @@ -633,7 +633,7 @@ perror replace) IF(WIN32) - ADD_DEPENDENCIES(minbuild echo mariadb-install-db my_safe_kill) + ADD_DEPENDENCIES(minbuild echo mariadb-install-db my_safe_kill mariadb-upgrade-service) ENDIF() ADD_CUSTOM_TARGET(smoketest COMMAND perl ./mysql-test-run.pl main.1st diff -Nru mariadb-10.11.11/Docs/INFO_SRC mariadb-10.11.13/Docs/INFO_SRC --- mariadb-10.11.11/Docs/INFO_SRC 2025-01-30 11:01:27.000000000 +0000 +++ mariadb-10.11.13/Docs/INFO_SRC 2025-05-19 16:14:28.000000000 +0000 @@ -1,8 +1,8 @@ -commit: e69f8cae1a15e15b9e4f5e0f8497e1f17bdc81a4 -date: 2025-01-30 11:55:13 +0100 -build-date: 2025-01-30 11:01:27 +0000 -short: e69f8cae1a1 +commit: 8fb09426b98583916ccfd4f8c49741adc115bac3 +date: 2025-05-13 12:27:50 +0300 +build-date: 2025-05-19 16:14:28 +0000 +short: 8fb09426b98 branch: HEAD -MariaDB source 10.11.11 +MariaDB source 10.11.13 diff -Nru mariadb-10.11.11/VERSION mariadb-10.11.13/VERSION --- mariadb-10.11.11/VERSION 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/VERSION 2025-05-19 16:14:23.000000000 +0000 @@ -1,4 +1,4 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=11 -MYSQL_VERSION_PATCH=11 +MYSQL_VERSION_PATCH=13 SERVER_MATURITY=stable diff -Nru mariadb-10.11.11/appveyor.yml mariadb-10.11.13/appveyor.yml --- mariadb-10.11.11/appveyor.yml 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/appveyor.yml 2025-05-19 16:14:23.000000000 +0000 @@ -1,6 +1,42 @@ version: build-{build}~branch-{branch} -clone_depth: 1 +clone_depth: 10 + +skip_branch_with_pr: true +before_build: + - ps: | + function Get-Remote-Ref($ref) { + try { + $result = git ls-remote origin $ref 2>$null + if (-not $result) { + "Warning: Could not fetch remote ref '$ref'" + return $null + } + return ($result -split "`t")[0] + } catch { + "Warning: Exception while running git ls-remote for '$ref': $_" + return $null + } + } + Get-ChildItem Env: | Where-Object { $_.Name -like 'APPVEYOR*COMMIT' } | ForEach-Object { "$($_.Name)=$($_.Value)" } + $commit = $env:APPVEYOR_REPO_COMMIT + $commit2 = $env:APPVEYOR_PULL_REQUEST_HEAD_COMMIT + $branch = $env:APPVEYOR_REPO_BRANCH + $latest = $null + $mainBranch = $branch -match '^(main|\d+\.\d+)$' + if ($env:APPVEYOR_PULL_REQUEST_NUMBER -eq $null) { + "Branch build detected" + $latest = Get-Remote-Ref "refs/heads/$branch" + } else { + $pr = $env:APPVEYOR_PULL_REQUEST_NUMBER + $latest = Get-Remote-Ref "refs/pull/$pr/head" + $mainBranch = $False + "Pull Request build detected" + } + if ($latest -and ($commit -ne $latest) -and ($commit2 -ne $latest) -and (-not $mainBranch)) { + "Skipping outdated commit (latest is $latest)" + Exit-AppVeyorBuild + } build_script: # dump some system info diff -Nru mariadb-10.11.11/client/mysql_upgrade.c mariadb-10.11.13/client/mysql_upgrade.c --- mariadb-10.11.11/client/mysql_upgrade.c 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/client/mysql_upgrade.c 2025-05-19 16:14:24.000000000 +0000 @@ -855,8 +855,7 @@ s= strchr(version, '.'); s= strchr(s + 1, '.'); - if (strncmp(upgrade_from_version, version, - (size_t)(s - version + 1))) + if (strncmp(upgrade_from_version, version, (size_t)(s - version + 1))) { if (calc_server_version(upgrade_from_version) <= MYSQL_VERSION_ID) { @@ -870,9 +869,14 @@ } if (!silent) { - verbose("This installation of MariaDB is already upgraded to %s.\n" - "There is no need to run mysql_upgrade again for %s.", - upgrade_from_version, version); + if (strcmp(upgrade_from_version, version)) + verbose("This installation of MariaDB is already upgraded to %s.\n" + "There is no need to run mysql_upgrade again for %s, because " + "they're both %.*s.", + upgrade_from_version, version, (int)(s - version), version); + else + verbose("This installation of MariaDB is already upgraded to %s.\n" + "There is no need to run mysql_upgrade again.", version); if (!opt_check_upgrade) verbose("You can use --force if you still want to run mysql_upgrade"); } diff -Nru mariadb-10.11.11/client/mysqlbinlog.cc mariadb-10.11.13/client/mysqlbinlog.cc --- mariadb-10.11.11/client/mysqlbinlog.cc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/client/mysqlbinlog.cc 2025-05-19 16:14:24.000000000 +0000 @@ -160,7 +160,13 @@ static char *start_datetime_str, *stop_datetime_str; static my_time_t start_datetime= 0, stop_datetime= MY_TIME_T_MAX; -static my_time_t last_processed_datetime= MY_TIME_T_MAX; + +typedef struct _last_processed_ev_t +{ + ulonglong position; + my_time_t datetime; +} last_processed_ev_t; +static last_processed_ev_t last_processed_ev= {0, MY_TIME_T_MAX}; static ulonglong rec_count= 0; static MYSQL* mysql = NULL; @@ -1611,7 +1617,19 @@ end: rec_count++; end_skip_count: - last_processed_datetime= ev_when; + /* + Update the last_processed_ev, unless the event is a fake event (i.e. format + description (ev pointer is reset to 0) or rotate event (ev->when is 0)), or + the event is encrypted (i.e. type is Unknown). + */ + if (ev && + !(ev_type == UNKNOWN_EVENT && + ((Unknown_log_event *) ev)->what == Unknown_log_event::ENCRYPTED) && + !(ev_type == ROTATE_EVENT && !ev->when)) + { + last_processed_ev.position= pos + ev->data_written; + last_processed_ev.datetime= ev_when; + } DBUG_PRINT("info", ("end event processing")); /* @@ -2925,6 +2943,9 @@ if (old_off != BIN_LOG_HEADER_SIZE) *len= 1; // fake event, don't increment old_off } + DBUG_ASSERT(old_off + ev->data_written == old_off + (*len - 1) || + (*len == 1 && + (type == ROTATE_EVENT || type == FORMAT_DESCRIPTION_EVENT))); Exit_status retval= process_event(print_event_info, ev, old_off, logname); if (retval != OK_CONTINUE) DBUG_RETURN(retval); @@ -2943,6 +2964,9 @@ DBUG_RETURN(ERROR_STOP); } + DBUG_ASSERT(old_off + ev->data_written == old_off + (*len - 1) || + (*len == 1 && + (type == ROTATE_EVENT || type == FORMAT_DESCRIPTION_EVENT))); retval= process_event(print_event_info, ev, old_off, logname); if (retval != OK_CONTINUE) { @@ -3342,6 +3366,8 @@ the new one, so we should not do it ourselves in this case. */ + DBUG_ASSERT(tmp_pos + new_description_event->data_written == + my_b_tell(file)); Exit_status retval= process_event(print_event_info, new_description_event, tmp_pos, logname); @@ -3495,20 +3521,17 @@ } // else read_error == 0 means EOF, that's OK, we break in this case - /* - Emit a warning in the event that we finished processing input - before reaching the boundary indicated by --stop-position. - */ - if (((longlong)stop_position != stop_position_default) && - stop_position > my_b_tell(file)) - { - retval = OK_STOP; - warning("Did not reach stop position %llu before " - "end of input", stop_position); - } - goto end; } + + /* + The real location that we have read up to in the file should align with + the size of the event, unless the event is encrypted. + */ + DBUG_ASSERT( + ((ev->get_type_code() == UNKNOWN_EVENT && + ((Unknown_log_event *) ev)->what == Unknown_log_event::ENCRYPTED)) || + old_off + ev->data_written == my_b_tell(file)); if ((retval= process_event(print_event_info, ev, old_off, logname)) != OK_CONTINUE) goto end; @@ -3687,10 +3710,18 @@ start_position= BIN_LOG_HEADER_SIZE; } + /* + Emit a warning if we finished processing input before reaching the stop + boundaries indicated by --stop-datetime or --stop-position. + */ if (stop_datetime != MY_TIME_T_MAX && - stop_datetime > last_processed_datetime) + stop_datetime > last_processed_ev.datetime) warning("Did not reach stop datetime '%s' before end of input", stop_datetime_str); + if ((static_cast(stop_position) != stop_position_default) && + stop_position > last_processed_ev.position) + warning("Did not reach stop position %llu before end of input", + stop_position); /* If enable flashback, need to print the events from the end to the diff -Nru mariadb-10.11.11/client/mysqldump.c mariadb-10.11.13/client/mysqldump.c --- mariadb-10.11.11/client/mysqldump.c 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/client/mysqldump.c 2025-05-19 16:14:24.000000000 +0000 @@ -2158,7 +2158,7 @@ *to++='\\'; } if (*name == '\'') - *to++= '\\'; + *to++= '\''; *to++= *name++; } to[0]= '\''; @@ -3713,7 +3713,7 @@ fprintf(sql_file, "DELIMITER ;;\n" - "/*!50003 SET SESSION SQL_MODE=\"%s\" */;;\n" + "/*!50003 SET SESSION SQL_MODE='%s' */;;\n" "/*!50003 CREATE */ ", (*show_trigger_row)[6]); @@ -4730,17 +4730,19 @@ return 1; while ((row= mysql_fetch_row(tableres))) { + char buf[200]; if (opt_replace_into) /* Protection against removing the current import user */ /* MySQL-8.0 export capability */ fprintf(md_result_file, "DELIMITER |\n" - "/*M!100101 IF current_user()=\"%s\" THEN\n" + "/*M!100101 IF current_user()=%s THEN\n" " SIGNAL SQLSTATE '45000' SET MYSQL_ERRNO=30001," " MESSAGE_TEXT=\"Don't remove current user %s'\";\n" "END IF */|\n" "DELIMITER ;\n" - "/*!50701 DROP USER IF EXISTS %s */;\n", row[0], row[0], row[0]); + "/*!50701 DROP USER IF EXISTS %s */;\n", + quote_for_equal(row[0],buf), row[0], row[0]); if (dump_create_user(row[0])) result= 1; /* if roles exist, defer dumping grants until after roles created */ @@ -6858,6 +6860,7 @@ char *result_table, *opt_quoted_table; char table_buff[NAME_LEN*2+3]; char table_buff2[NAME_LEN*2+3]; + char temp_buff[NAME_LEN*2 + 3], temp_buff2[NAME_LEN*2 + 3]; char query[QUERY_LENGTH]; FILE *sql_file= md_result_file; DBUG_ENTER("get_view_structure"); @@ -6918,7 +6921,9 @@ "SELECT CHECK_OPTION, DEFINER, SECURITY_TYPE, " " CHARACTER_SET_CLIENT, COLLATION_CONNECTION " "FROM information_schema.views " - "WHERE table_name=\"%s\" AND table_schema=\"%s\"", table, db); + "WHERE table_name=%s AND table_schema=%s", + quote_for_equal(table, temp_buff2), + quote_for_equal(db, temp_buff)); if (mysql_query(mysql, query)) { diff -Nru mariadb-10.11.11/client/mysqlslap.c mariadb-10.11.13/client/mysqlslap.c --- mariadb-10.11.11/client/mysqlslap.c 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/client/mysqlslap.c 2025-05-19 16:14:24.000000000 +0000 @@ -2237,6 +2237,13 @@ stats *ptr; unsigned int x; + if (eng && eng->string) + con->engine= eng->string; + + /* Early return when iterations is 0 to avoid accessing uninitialized sptr */ + if (iterations == 0) + return; + con->min_timing= sptr->timing; con->max_timing= sptr->timing; con->min_rows= sptr->rows; @@ -2257,11 +2264,6 @@ con->min_timing= ptr->timing; } con->avg_timing= con->avg_timing/iterations; - - if (eng && eng->string) - con->engine= eng->string; - else - con->engine= NULL; } void diff -Nru mariadb-10.11.11/client/mysqltest.cc mariadb-10.11.13/client/mysqltest.cc --- mariadb-10.11.11/client/mysqltest.cc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/client/mysqltest.cc 2025-05-19 16:14:24.000000000 +0000 @@ -6744,7 +6744,7 @@ my_bool have_slash= FALSE; enum {R_NORMAL, R_Q, R_SLASH_IN_Q, - R_COMMENT, R_LINE_START} state= R_LINE_START; + R_COMMENT, R_LINE_START, R_CSTYLE_COMMENT} state= R_LINE_START; DBUG_ENTER("read_line"); *p= 0; @@ -6831,9 +6831,23 @@ state= R_Q; } } + else if (c == '*' && last_char == '/') + { + state= R_CSTYLE_COMMENT; + break; + } have_slash= is_escape_char(c, last_quote); break; + case R_CSTYLE_COMMENT: + if (c == '!') + // Got the hint introducer '/*!'. Switch to normal processing of + // next following characters + state= R_NORMAL; + else if (c == '/' && last_char == '*') + state= R_NORMAL; + break; + case R_COMMENT: if (c == '\n') { diff -Nru mariadb-10.11.11/cmake/cpack_rpm.cmake mariadb-10.11.13/cmake/cpack_rpm.cmake --- mariadb-10.11.11/cmake/cpack_rpm.cmake 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/cmake/cpack_rpm.cmake 2025-05-19 16:14:24.000000000 +0000 @@ -245,7 +245,7 @@ "galera-4" "rsync" "grep" "gawk" "iproute" "coreutils" "findutils" "tar") SETA(CPACK_RPM_server_PACKAGE_RECOMMENDS "lsof" "socat" "pv") - SETA(CPACK_RPM_test_PACKAGE_REQUIRES "socat") + SETA(CPACK_RPM_test_PACKAGE_REQUIRES "${CPACK_RPM_PACKAGE_REQUIRES}" "socat") ENDIF() SET(CPACK_RPM_server_PRE_INSTALL_SCRIPT_FILE ${CMAKE_SOURCE_DIR}/support-files/rpm/server-prein.sh) @@ -292,7 +292,7 @@ ALTERNATIVE_NAME("server" "mariadb-server") ALTERNATIVE_NAME("server" "mysql-compat-server") ALTERNATIVE_NAME("test" "mariadb-test") -ELSEIF(RPM MATCHES "(rhel|centos|rocky)[89]") +ELSEIF(RPM MATCHES "(rhel|centos|rocky)") SET(epoch 3:) ALTERNATIVE_NAME("backup" "mariadb-backup") ALTERNATIVE_NAME("client" "mariadb") diff -Nru mariadb-10.11.11/cmake/libfmt.cmake mariadb-10.11.13/cmake/libfmt.cmake --- mariadb-10.11.11/cmake/libfmt.cmake 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/cmake/libfmt.cmake 2025-05-19 16:14:24.000000000 +0000 @@ -28,15 +28,14 @@ IF(WITH_LIBFMT STREQUAL "system" OR WITH_LIBFMT STREQUAL "auto") SET(CMAKE_REQUIRED_INCLUDES ${LIBFMT_INCLUDE_DIR}) CHECK_CXX_SOURCE_RUNS( - "#define FMT_STATIC_THOUSANDS_SEPARATOR ',' - #define FMT_HEADER_ONLY 1 + "#define FMT_HEADER_ONLY 1 #include int main() { using ArgStore= fmt::dynamic_format_arg_store; ArgStore arg_store; int answer= 4321; arg_store.push_back(answer); - return fmt::vformat(\"{:L}\", arg_store).compare(\"4,321\"); + return fmt::vformat(\"{}\", arg_store).compare(\"4321\"); }" HAVE_SYSTEM_LIBFMT) SET(CMAKE_REQUIRED_INCLUDES) ENDIF() diff -Nru mariadb-10.11.11/cmake/os/Windows.cmake mariadb-10.11.13/cmake/os/Windows.cmake --- mariadb-10.11.11/cmake/os/Windows.cmake 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/cmake/os/Windows.cmake 2025-05-19 16:14:24.000000000 +0000 @@ -15,352 +15,212 @@ # This file includes Windows specific hacks, mostly around compiler flags -INCLUDE (CheckCSourceCompiles) -INCLUDE (CheckCXXSourceCompiles) -INCLUDE (CheckStructHasMember) -INCLUDE (CheckLibraryExists) -INCLUDE (CheckFunctionExists) -INCLUDE (CheckCSourceRuns) -INCLUDE (CheckSymbolExists) -INCLUDE (CheckTypeSize) - -IF(MSVC) - IF(CMAKE_CXX_COMPILER_ARCHITECTURE_ID STREQUAL ARM64) - SET(MSVC_ARM64 1) - SET(MSVC_INTEL 0) - ELSE() - SET(MSVC_INTEL 1) - ENDIF() -ENDIF() + +if(MSVC) + if(CMAKE_CXX_COMPILER_ARCHITECTURE_ID STREQUAL ARM64) + set(MSVC_ARM64 1) + set(MSVC_INTEL 0) + else() + set(MSVC_INTEL 1) + endif() + if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) + set(CLANG_CL TRUE) + endif() +endif() # avoid running system checks by using pre-cached check results # system checks are expensive on VS since every tiny program is to be compiled in # a VC solution. -GET_FILENAME_COMPONENT(_SCRIPT_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) -INCLUDE(${_SCRIPT_DIR}/WindowsCache.cmake) - +get_filename_component(_SCRIPT_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) +include(${_SCRIPT_DIR}/WindowsCache.cmake) # OS display name (version_compile_os etc). -# Used by the test suite to ignore bugs on some platforms, -IF(CMAKE_SIZEOF_VOID_P MATCHES 8) - SET(SYSTEM_TYPE "Win64") -ELSE() - SET(SYSTEM_TYPE "Win32") -ENDIF() - -# Intel compiler is almost Visual C++ -# (same compile flags etc). Set MSVC flag -IF(CMAKE_C_COMPILER MATCHES "icl") - SET(MSVC TRUE) -ENDIF() - -IF(MSVC AND CMAKE_CXX_COMPILER_ID MATCHES Clang) - SET(CLANG_CL TRUE) -ENDIF() - -ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE) -ADD_DEFINITIONS(-D_WIN32_WINNT=0x0A00) -# We do not want the windows.h , or winsvc.h macros min/max -ADD_DEFINITIONS(-DNOMINMAX -DNOSERVICE) -# Speed up build process excluding unused header files -ADD_DEFINITIONS(-DWIN32_LEAN_AND_MEAN) - -# Adjust compiler and linker flags -IF(MINGW AND CMAKE_SIZEOF_VOID_P EQUAL 4) - # mininal architecture flags, i486 enables GCC atomics - ADD_DEFINITIONS(-march=i486) -ENDIF() - -MACRO(ENABLE_SANITIZERS) - IF(NOT MSVC) - MESSAGE(FATAL_ERROR "clang-cl or MSVC necessary to enable asan/ubsan") - ENDIF() - # currently, asan is broken with static CRT. - IF(CLANG_CL AND NOT(MSVC_CRT_TYPE STREQUAL "/MD")) - SET(MSVC_CRT_TYPE "/MD" CACHE INTERNAL "" FORCE) - ENDIF() - IF(CMAKE_SIZEOF_VOID_P EQUAL 4) - SET(ASAN_ARCH i386) - ELSE() - SET(ASAN_ARCH x86_64) - ENDIF() - - # After installation, clang lib directory should be added to PATH - # (e.g C:/Program Files/LLVM/lib/clang/5.0.1/lib/windows) - SET(SANITIZER_LIBS) - SET(SANITIZER_LINK_LIBRARIES) - SET(SANITIZER_COMPILE_FLAGS) - IF(WITH_ASAN) - IF(CLANG_CL) - LIST(APPEND SANITIZER_LIBS - clang_rt.asan_dynamic-${ASAN_ARCH}.lib clang_rt.asan_dynamic_runtime_thunk-${ASAN_ARCH}.lib) - ENDIF() - STRING(APPEND SANITIZER_COMPILE_FLAGS " -fsanitize=address") - ENDIF() - IF(WITH_UBSAN) - STRING(APPEND SANITIZER_COMPILE_FLAGS " -fsanitize=undefined -fno-sanitize=alignment") - ENDIF() - FOREACH(lib ${SANITIZER_LIBS}) - FIND_LIBRARY(${lib}_fullpath ${lib}) - IF(NOT ${lib}_fullpath) - MESSAGE(FATAL_ERROR "Can't enable sanitizer : missing ${lib}") +# Used by the test suite to ignore bugs on some platforms +if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(SYSTEM_TYPE "Win64") +else() + set(SYSTEM_TYPE "Win32") +endif() + +function(find_asan_runtime result_list) + set(${result_list} "" PARENT_SCOPE) + if(CMAKE_C_COMPILER_VERSION) + set(CLANG_VERSION "${CMAKE_C_COMPILER_VERSION}") + else() + return() + endif() + + get_filename_component(CLANG_BIN_DIR "${CMAKE_C_COMPILER}" DIRECTORY) + get_filename_component(LLVM_ROOT "${CLANG_BIN_DIR}" DIRECTORY) + + # Determine target architecture + execute_process( + COMMAND "${CMAKE_C_COMPILER}" --version + OUTPUT_VARIABLE CLANG_VERSION_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + ) + + if(CLANG_VERSION_OUTPUT MATCHES "x86_64") + set(ARCH_SUFFIX "x86_64") + elseif(CLANG_VERSION_OUTPUT MATCHES "i686|i386") + set(ARCH_SUFFIX "i386") + elseif(CLANG_VERSION_OUTPUT MATCHES "aarch64") + set(ARCH_SUFFIX "aarch64") + else() + message(FATAL_ERROR "unknown arch") + endif() + + string(REGEX MATCH "^[0-9]+" CLANG_MAJOR_VERSION "${CMAKE_C_COMPILER_VERSION}") + set(CLANG_VERSION_DIR "${LLVM_ROOT}/lib/clang/${CLANG_MAJOR_VERSION}") + + set(out) + foreach(name clang_rt.asan_dynamic-${ARCH_SUFFIX}.lib + clang_rt.asan_dynamic_runtime_thunk-${ARCH_SUFFIX}.lib) + set(path "${CLANG_VERSION_DIR}/lib/windows/${name}") + if(EXISTS "${path}") + list(APPEND out ${path}) + else() + message(FATAL_ERROR "expected library ${path} not found") ENDIF() - LIST(APPEND CMAKE_REQUIRED_LIBRARIES ${${lib}_fullpath}) - STRING(APPEND CMAKE_C_STANDARD_LIBRARIES " \"${${lib}_fullpath}\" ") - STRING(APPEND CMAKE_CXX_STANDARD_LIBRARIES " \"${${lib}_fullpath}\" ") - ENDFOREACH() - STRING(APPEND CMAKE_C_FLAGS ${SANITIZER_COMPILE_FLAGS}) - STRING(APPEND CMAKE_CXX_FLAGS ${SANITIZER_COMPILE_FLAGS}) -ENDMACRO() + endforeach() + set(${result_list} ${out} PARENT_SCOPE) +endfunction() + +macro(enable_sanitizers) + # Remove the runtime checks from the compiler flags + # ASAN does the same thing, in many cases better + foreach(lang C CXX) + foreach(suffix "_DEBUG" "_DEBUG_INIT") + string(REGEX REPLACE "/RTC[1su]" "" CMAKE_${lang}_FLAGS${suffix} "${CMAKE_${lang}_FLAGS${suffix}}") + endforeach() + endforeach() + + if(WITH_ASAN) + add_compile_options($<$:/fsanitize=address>) + endif() + if(WITH_UBSAN) + include(CheckCCompilerFlag) + check_c_compiler_flag(/fsanitize=undefined HAVE_fsanitize_undefined) + if (HAVE_fsanitize_undefined) + add_compile_options($<$:/fsanitize=undefined>) + else() + message(FATAL_ERROR "UBSAN not supported by this compiler yet") + endif() + endif() + if(CLANG_CL) + find_asan_runtime(asan_libs) + foreach(lib ${asan_libs}) + link_libraries(${lib}) + string(APPEND CMAKE_C_STANDARD_LIBRARIES " \"${lib}\"") + string(APPEND CMAKE_CXX_STANDARD_LIBRARIES " \"${lib}\"") + endforeach() + else() + add_link_options(/INCREMENTAL:NO) + endif() +endmacro() -IF(MSVC) - IF(MSVC_VERSION LESS 1920) - MESSAGE(FATAL_ERROR "Visual Studio 2019 or later is required") - ENDIF() +if(MSVC) # Disable mingw based pkg-config found in Strawberry perl - SET(PKG_CONFIG_EXECUTABLE 0 CACHE INTERNAL "") + set(PKG_CONFIG_EXECUTABLE 0 CACHE INTERNAL "") - SET(MSVC_CRT_TYPE /MT CACHE STRING - "Runtime library - specify runtime library for linking (/MT,/MTd,/MD,/MDd)" - ) - SET(VALID_CRT_TYPES /MTd /MDd /MD /MT) - IF (NOT ";${VALID_CRT_TYPES};" MATCHES ";${MSVC_CRT_TYPE};") - MESSAGE(FATAL_ERROR "Invalid value ${MSVC_CRT_TYPE} for MSVC_CRT_TYPE, choose one of /MT,/MTd,/MD,/MDd ") - ENDIF() - - IF(MSVC_CRT_TYPE MATCHES "/MD") - # Dynamic runtime (DLLs), need to install CRT libraries. - SET(CMAKE_INSTALL_SYSTEM_RUNTIME_COMPONENT VCCRT) - SET(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS TRUE) - IF(MSVC_CRT_TYPE STREQUAL "/MDd") - SET (CMAKE_INSTALL_DEBUG_LIBRARIES_ONLY TRUE) - ENDIF() - INCLUDE(InstallRequiredSystemLibraries) - ENDIF() - - IF(WITH_ASAN AND (NOT CLANG_CL)) - SET(DYNAMIC_UCRT_LINK_DEFAULT OFF) - ELSE() - SET(DYNAMIC_UCRT_LINK_DEFAULT ON) - ENDIF() - - OPTION(DYNAMIC_UCRT_LINK "Link Universal CRT dynamically, if MSVC_CRT_TYPE=/MT" ${DYNAMIC_UCRT_LINK_DEFAULT}) - SET(DYNAMIC_UCRT_LINKER_OPTION " /NODEFAULTLIB:libucrt.lib /DEFAULTLIB:ucrt.lib") - - # Enable debug info also in Release build, - # and create PDB to be able to analyze crashes. - FOREACH(type EXE SHARED MODULE) - SET(CMAKE_${type}_LINKER_FLAGS_RELEASE - "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /debug") - SET(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL - "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /debug") - ENDFOREACH() - - # Force runtime libraries - # Compile with /Zi to get debugging information + if(NOT DEFINED CMAKE_MSVC_RUNTIME_LIBRARY) + set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreadedDLL) + endif() + + if(CMAKE_MSVC_RUNTIME_LIBRARY MATCHES "DLL") + # Dynamic runtime (DLLs), need to install CRT libraries. + set(CMAKE_INSTALL_SYSTEM_RUNTIME_COMPONENT VCCRT) + set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS TRUE) + if(CMAKE_MSVC_RUNTIME_LIBRARY STREQUAL "MultiThreadedDebugDLL") + set(CMAKE_INSTALL_DEBUG_LIBRARIES_ONLY TRUE) + endif() + include(InstallRequiredSystemLibraries) + endif() - FOREACH(lang C CXX) - SET(CMAKE_${lang}_FLAGS_RELEASE "${CMAKE_${lang}_FLAGS_RELEASE} /Zi") - ENDFOREACH() - FOREACH(flag - CMAKE_C_FLAGS CMAKE_CXX_FLAGS - CMAKE_C_FLAGS_INIT CMAKE_CXX_FLAGS_INIT - CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_DEBUG_INIT - CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG_INIT - CMAKE_C_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_MINSIZEREL - ) - STRING(REGEX REPLACE "/M[TD][d]?" "${MSVC_CRT_TYPE}" "${flag}" "${${flag}}" ) - STRING(REPLACE "/ZI " "/Zi " "${flag}" "${${flag}}") - IF((NOT "${${flag}}" MATCHES "/Zi") AND (NOT "${${flag}}" MATCHES "/Z7")) - STRING(APPEND ${flag} " /Zi") - ENDIF() - # Remove inlining flags, added by CMake, if any. - # Compiler default is fine. - STRING(REGEX REPLACE "/Ob[0-3]" "" "${flag}" "${${flag}}" ) - ENDFOREACH() - - # Allow to overwrite the inlining flag - SET(MSVC_INLINE "" CACHE STRING - "MSVC Inlining option, either empty, or one of /Ob0,/Ob1,/Ob2,/Ob3") - IF(MSVC_INLINE MATCHES "/Ob[0-3]") - ADD_COMPILE_OPTIONS(${MSVC_INLINE}) - ELSEIF(NOT(MSVC_INLINE STREQUAL "")) - MESSAGE(FATAL_ERROR "Invalid option for MSVC_INLINE") - ENDIF() + # Compile with /Zi to get debugging information + if (NOT DEFINED CMAKE_MSVC_DEBUG_INFORMATION_FORMAT) + set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "ProgramDatabase") + add_link_options(/DEBUG) # Ensure debugging info at link time + endif() - IF(WITH_ASAN OR WITH_UBSAN) + if(WITH_ASAN OR WITH_UBSAN) # Workaround something Linux specific - SET(SECURITY_HARDENED 0 CACHE INTERNAL "" FORCE) - ENABLE_SANITIZERS() - ENDIF() - - IF(CLANG_CL) - SET(CLANG_CL_FLAGS -"-Wno-unknown-warning-option -Wno-unused-private-field \ --Wno-unused-parameter -Wno-inconsistent-missing-override \ --Wno-unused-command-line-argument -Wno-pointer-sign \ --Wno-deprecated-register -Wno-missing-braces \ --Wno-unused-function -Wno-unused-local-typedef -msse4.2 " + set(SECURITY_HARDENED 0 CACHE INTERNAL "" FORCE) + enable_sanitizers() + endif() + + add_compile_definitions( + _CRT_SECURE_NO_DEPRECATE + _CRT_NONSTDC_NO_WARNINGS + _WIN32_WINNT=0x0A00 + # We do not want the windows.h , or winsvc.h macros min/max + NOMINMAX NOSERVICE + # Speed up build process excluding unused header files + WIN32_LEAN_AND_MEAN + ) + if(CLANG_CL) + add_compile_options( + -Wno-unknown-warning-option + -Wno-unused-private-field + -Wno-unused-parameter + -Wno-inconsistent-missing-override + -Wno-unused-command-line-argument + -Wno-pointer-sign + -Wno-deprecated-register + -Wno-missing-braces + -Wno-unused-function + -Wno-unused-local-typedef + -Wno-microsoft-static-assert + -Wno-c++17-extensions + -msse4.2 ) - IF(CMAKE_SIZEOF_VOID_P MATCHES 8) - STRING(APPEND CLANG_CL_FLAGS "-mpclmul ") - ENDIF() - STRING(APPEND CMAKE_C_FLAGS " ${CLANG_CL_FLAGS} ${MSVC_CRT_TYPE}") - STRING(APPEND CMAKE_CXX_FLAGS " ${CLANG_CL_FLAGS} ${MSVC_CRT_TYPE}") - ENDIF() - - FOREACH(type EXE SHARED MODULE) - STRING(REGEX REPLACE "/STACK:([^ ]+)" "" CMAKE_${type}_LINKER_FLAGS "${CMAKE_${type}_LINKER_FLAGS}") - IF(WITH_ASAN) - SET(build_types RELWITHDEBINFO DEBUG) - ELSE() - SET(build_types RELWITHDEBINFO) - ENDIF() - FOREACH(btype ${build_types}) - STRING(REGEX REPLACE "/INCREMENTAL:([^ ]+)" "/INCREMENTAL:NO" CMAKE_${type}_LINKER_FLAGS_${btype} "${CMAKE_${type}_LINKER_FLAGS_${btype}}") - STRING(REGEX REPLACE "/INCREMENTAL$" "/INCREMENTAL:NO" CMAKE_${type}_LINKER_FLAGS_${btype} "${CMAKE_${type}_LINKER_FLAGS_${btype}}") - ENDFOREACH() - IF(NOT CLANG_CL) - STRING(APPEND CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO " /release /OPT:REF,ICF") - ENDIF() - IF(DYNAMIC_UCRT_LINK AND (MSVC_CRT_TYPE STREQUAL "/MT")) - FOREACH(config RELEASE RELWITHDEBINFO DEBUG MINSIZEREL) - STRING(APPEND CMAKE_${type}_LINKER_FLAGS_${config} ${DYNAMIC_UCRT_LINKER_OPTION}) - ENDFOREACH() - ENDIF() - ENDFOREACH() + if((CMAKE_SIZEOF_VOID_P MATCHES 8) AND MSVC_INTEL) + add_compile_options(-mpclmul) + endif() + endif() - # Mark 32 bit executables large address aware so they can # use > 2GB address space - IF(CMAKE_SIZEOF_VOID_P MATCHES 4) - STRING(APPEND CMAKE_EXE_LINKER_FLAGS " /LARGEADDRESSAWARE") - ENDIF() - - # Speed up multiprocessor build - IF (NOT CLANG_CL) - STRING(APPEND CMAKE_C_FLAGS " /MP") - STRING(APPEND CMAKE_CXX_FLAGS " /MP") - STRING(APPEND CMAKE_C_FLAGS_RELWITHDEBINFO " /Gw") - STRING(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " /Gw") - ENDIF() - - #TODO: update the code and remove the disabled warnings - STRING(APPEND CMAKE_C_FLAGS " /we4700 /we4311 /we4477 /we4302 /we4090") - STRING(APPEND CMAKE_CXX_FLAGS " /we4099 /we4700 /we4311 /we4477 /we4302 /we4090") - IF(MSVC_VERSION GREATER 1910 AND NOT CLANG_CL) - STRING(APPEND CMAKE_CXX_FLAGS " /permissive-") - STRING(APPEND CMAKE_C_FLAGS " /diagnostics:caret") - STRING(APPEND CMAKE_CXX_FLAGS " /diagnostics:caret") - ENDIF() - ADD_DEFINITIONS(-D_CRT_NONSTDC_NO_WARNINGS) - IF(MYSQL_MAINTAINER_MODE MATCHES "ERR") - STRING(APPEND CMAKE_C_FLAGS " /WX") - STRING(APPEND CMAKE_CXX_FLAGS " /WX") - FOREACH(type EXE SHARED MODULE) - FOREACH(cfg RELEASE DEBUG RELWITHDEBINFO) - SET(CMAKE_${type}_LINKER_FLAGS_${cfg} "${CMAKE_${type}_LINKER_FLAGS_${cfg}} /WX") - ENDFOREACH() - ENDFOREACH() - ENDIF() - - IF(FAST_BUILD) - STRING (REGEX REPLACE "/RTC(su|[1su])" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - ELSEIF (NOT CLANG_CL) - STRING(APPEND CMAKE_CXX_FLAGS_RELEASE " /d2OptimizeHugeFunctions") - STRING(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " /d2OptimizeHugeFunctions") - ENDIF() - ADD_COMPILE_OPTIONS($<$:/utf-8>) -ENDIF() - -# Always link with socket/synchronization libraries -STRING(APPEND CMAKE_C_STANDARD_LIBRARIES " ws2_32.lib synchronization.lib") -STRING(APPEND CMAKE_CXX_STANDARD_LIBRARIES " ws2_32.lib synchronization.lib") - -# System checks -SET(SIGNAL_WITH_VIO_CLOSE 1) # Something that runtime team needs - -# IPv6 constants appeared in Vista SDK first. We need to define them in any case if they are -# not in headers, to handle dual mode sockets correctly. -CHECK_SYMBOL_EXISTS(IPPROTO_IPV6 "winsock2.h" HAVE_IPPROTO_IPV6) -IF(NOT HAVE_IPPROTO_IPV6) - SET(HAVE_IPPROTO_IPV6 41) -ENDIF() -CHECK_SYMBOL_EXISTS(IPV6_V6ONLY "winsock2.h;ws2ipdef.h" HAVE_IPV6_V6ONLY) -IF(NOT HAVE_IPV6_V6ONLY) - SET(IPV6_V6ONLY 27) -ENDIF() - -# Some standard functions exist there under different -# names (e.g popen is _popen or strok_r is _strtok_s) -# If a replacement function exists, HAVE_FUNCTION is -# defined to 1. CMake variable will also -# be defined to the replacement name. -# So for example, CHECK_FUNCTION_REPLACEMENT(popen _popen) -# will define HAVE_POPEN to 1 and set variable named popen -# to _popen. If the header template, one needs to have -# cmakedefine popen @popen@ which will expand to -# define popen _popen after CONFIGURE_FILE - -MACRO(CHECK_FUNCTION_REPLACEMENT function replacement) - STRING(TOUPPER ${function} function_upper) - CHECK_FUNCTION_EXISTS(${function} HAVE_${function_upper}) - IF(NOT HAVE_${function_upper}) - CHECK_FUNCTION_EXISTS(${replacement} HAVE_${replacement}) - IF(HAVE_${replacement}) - SET(HAVE_${function_upper} 1 ) - SET(${function} ${replacement}) - ENDIF() - ENDIF() -ENDMACRO() -MACRO(CHECK_SYMBOL_REPLACEMENT symbol replacement header) - STRING(TOUPPER ${symbol} symbol_upper) - CHECK_SYMBOL_EXISTS(${symbol} ${header} HAVE_${symbol_upper}) - IF(NOT HAVE_${symbol_upper}) - CHECK_SYMBOL_EXISTS(${replacement} ${header} HAVE_${replacement}) - IF(HAVE_${replacement}) - SET(HAVE_${symbol_upper} 1) - SET(${symbol} ${replacement}) - ENDIF() - ENDIF() -ENDMACRO() + if(CMAKE_SIZEOF_VOID_P MATCHES 4) + add_link_options(/LARGEADDRESSAWARE) + endif() + + # RelWithDebInfo is deoptimized wrt inlining. + # Fix it to default + foreach(lang C CXX) + foreach(suffix "_RELWITHDEBINFO" "_RELWITHDEBINFO_INIT") + string(REGEX REPLACE "/Ob[0-1]" "" CMAKE_${lang}_FLAGS${suffix} "${CMAKE_${lang}_FLAGS${suffix}}") + endforeach() + endforeach() + + if(NOT CLANG_CL) + add_link_options("$<$:/INCREMENTAL:NO;/RELEASE;/OPT:REF,ICF>") + add_compile_options($<$:$<$:/Gw>>) + add_compile_options($<$:/MP>) + add_compile_options("$<$:/we4099;/we4700;/we4311;/we4477;/we4302;/we4090>") + add_compile_options($<$:/permissive->) + add_compile_options($<$:/diagnostics:caret>) + add_compile_options($<$:/utf-8>) + if(NOT FAST_BUILD) + add_compile_options($<$:$<$:/d2OptimizeHugeFunctions>>) + endif() + endif() + + if(MYSQL_MAINTAINER_MODE MATCHES "ERR") + set(CMAKE_COMPILE_WARNING_AS_ERROR ON) + add_link_options(/WX) + endif() +endif() + +# avoid running system checks by using pre-cached check results +# system checks are expensive on VS generator +get_filename_component(_SCRIPT_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) +include(${_SCRIPT_DIR}/WindowsCache.cmake) + +# this is out of place, not really a system check +set(FN_NO_CASE_SENSE 1) +set(USE_SYMDIR 1) +set(HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT 1) -CHECK_SYMBOL_REPLACEMENT(S_IROTH _S_IREAD sys/stat.h) -CHECK_SYMBOL_REPLACEMENT(S_IFIFO _S_IFIFO sys/stat.h) -CHECK_SYMBOL_REPLACEMENT(SIGQUIT SIGTERM signal.h) -CHECK_SYMBOL_REPLACEMENT(SIGPIPE SIGINT signal.h) -CHECK_FUNCTION_REPLACEMENT(popen _popen) -CHECK_FUNCTION_REPLACEMENT(pclose _pclose) -CHECK_FUNCTION_REPLACEMENT(access _access) -CHECK_FUNCTION_REPLACEMENT(strcasecmp _stricmp) -CHECK_FUNCTION_REPLACEMENT(strncasecmp _strnicmp) -CHECK_SYMBOL_REPLACEMENT(snprintf _snprintf stdio.h) -CHECK_FUNCTION_REPLACEMENT(strtok_r strtok_s) -CHECK_FUNCTION_REPLACEMENT(strtoll _strtoi64) -CHECK_FUNCTION_REPLACEMENT(strtoull _strtoui64) -CHECK_FUNCTION_REPLACEMENT(vsnprintf _vsnprintf) -CHECK_TYPE_SIZE(ssize_t SIZE_OF_SSIZE_T) -IF(NOT HAVE_SIZE_OF_SSIZE_T) - SET(ssize_t SSIZE_T) -ENDIF() - -SET(FN_NO_CASE_SENSE 1) -SET(USE_SYMDIR 1) - -# Force static C runtime for targets in current directory -# (useful to get rid of MFC dll's dependency, or in installer) -MACRO(FORCE_STATIC_CRT) - FOREACH(flag - CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_DEBUG_INIT - CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_DEBUG_INIT - CMAKE_C_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_MINSIZEREL - ) - STRING(REGEX REPLACE "/MD[d]?" "/MT" "${flag}" "${${flag}}" ) - STRING(REPLACE "${DYNAMIC_UCRT_LINKER_OPTION}" "" "${flag}" "${${flag}}") - ENDFOREACH() -ENDMACRO() diff -Nru mariadb-10.11.11/cmake/os/WindowsCache.cmake mariadb-10.11.13/cmake/os/WindowsCache.cmake --- mariadb-10.11.11/cmake/os/WindowsCache.cmake 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/cmake/os/WindowsCache.cmake 2025-05-19 16:14:24.000000000 +0000 @@ -203,10 +203,10 @@ SET(HAVE_STRNDUP CACHE INTERNAL "") SET(HAVE_STRNLEN 1 CACHE INTERNAL "") SET(HAVE_STRPBRK 1 CACHE INTERNAL "") -SET(HAVE_STRTOK_R CACHE INTERNAL "") -SET(HAVE_STRTOLL CACHE INTERNAL "") +SET(HAVE_STRTOK_R 1 CACHE INTERNAL "") +SET(HAVE_STRTOLL 1 CACHE INTERNAL "") SET(HAVE_STRTOUL 1 CACHE INTERNAL "") -SET(HAVE_STRTOULL CACHE INTERNAL "") +SET(HAVE_STRTOULL 1 CACHE INTERNAL "") SET(HAVE_SYNCH_H CACHE INTERNAL "") SET(HAVE_SYSENT_H CACHE INTERNAL "") SET(HAVE_SYS_DIR_H CACHE INTERNAL "") @@ -294,6 +294,7 @@ SET(HAVE_LINUX_UNISTD_H CACHE INTERNAL "") SET(HAVE_SYS_UTSNAME_H CACHE INTERNAL "") SET(HAVE_PTHREAD_ATTR_GETGUARDSIZE CACHE INTERNAL "") +SET(HAVE_PTHREAD_GETATTR_NP CACHE INTERNAL "") SET(HAVE_SOCKPEERCRED CACHE INTERNAL "") SET(HAVE_ABI_CXA_DEMANGLE CACHE INTERNAL "") SET(HAVE_GCC_C11_ATOMICS CACHE INTERNAL "") @@ -348,4 +349,16 @@ SET(HAVE_GETPAGESIZES CACHE INTERNAL "") SET(HAVE_LINUX_LIMITS_H CACHE INTERNAL "") SET(HAVE_FILE_UCONTEXT_H CACHE INTERNAL "") +SET(have_C__Werror CACHE INTERNAL "") +SET(HAVE_SIGNAL_H 1 CACHE INTERNAL "") +SET(HAVE_UINT CACHE INTERNAL "") +SET(HAVE_SOCKET_LEN_T CACHE INTERNAL "") +SET(HAVE_GETTHRID CACHE INTERNAL "") +SET(HAVE_THREAD_LOCAL 1 CACHE INTERNAL "") +SET(have_CXX__Wno_unused_but_set_variable CACHE INTERNAL "") +SET(HAVE_UNISTD_H CACHE INTERNAL "") +SET(HAVE_LINUX_UNISTD_H CACHE INTERNAL "") +SET(OFF64_T CACHE INTERNAL "") +SET(Z_HAVE_UNISTD_H CACHE INTERNAL "") +SET(HAVE_OFF64_T CACHE FALSE INTERNAL "") ENDIF(MSVC) diff -Nru mariadb-10.11.11/cmake/pcre.cmake mariadb-10.11.13/cmake/pcre.cmake --- mariadb-10.11.11/cmake/pcre.cmake 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/cmake/pcre.cmake 2025-05-19 16:14:24.000000000 +0000 @@ -54,11 +54,18 @@ ENDIF() ENDFOREACH() + IF(CMAKE_MSVC_RUNTIME_LIBRARY) + SET(CMAKE_MSVC_RUNTIME_LIBRARY_ARG + "-DCMAKE_MSVC_RUNTIME_LIBRARY=${CMAKE_MSVC_RUNTIME_LIBRARY}") + ELSE() + SET(CMAKE_MSVC_RUNTIME_LIBRARY_ARG) + ENDIF() + ExternalProject_Add( pcre2 PREFIX "${dir}" - URL "https://github.com/PCRE2Project/pcre2/releases/download/pcre2-10.44/pcre2-10.44.zip" - URL_MD5 dfab8313154b3377a6959c3b6377841e + URL "https://github.com/PCRE2Project/pcre2/releases/download/pcre2-10.45/pcre2-10.45.zip" + URL_MD5 873da56c6469ec207ca5c5ae9688b83a INSTALL_COMMAND "" CMAKE_ARGS "-DCMAKE_WARN_DEPRECATED=FALSE" @@ -72,6 +79,7 @@ "-DCMAKE_C_FLAGS_RELEASE=${pcre2_flags_RELEASE}" "-DCMAKE_C_FLAGS_MINSIZEREL=${pcre2_flags_MINSIZEREL}" "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + ${CMAKE_MSVC_RUNTIME_LIBRARY_ARG} ${stdlibs} ${byproducts} ) diff -Nru mariadb-10.11.11/cmake/plugin.cmake mariadb-10.11.13/cmake/plugin.cmake --- mariadb-10.11.11/cmake/plugin.cmake 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/cmake/plugin.cmake 2025-05-19 16:14:24.000000000 +0000 @@ -214,6 +214,11 @@ TARGET_LINK_LIBRARIES (${target} mysqlservices ${ARG_LINK_LIBRARIES}) + IF(WIN32) + # A popular library, turns out many plugins need it for gethostname() + TARGET_LINK_LIBRARIES (${target} ws2_32) + ENDIF() + IF(CMAKE_SYSTEM_NAME MATCHES AIX) TARGET_LINK_OPTIONS(${target} PRIVATE "-Wl,-bE:${CMAKE_SOURCE_DIR}/libservices/mysqlservices_aix.def") ENDIF() diff -Nru mariadb-10.11.11/config.h.cmake mariadb-10.11.13/config.h.cmake --- mariadb-10.11.11/config.h.cmake 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/config.h.cmake 2025-05-19 16:14:24.000000000 +0000 @@ -402,38 +402,27 @@ #cmakedefine SIGNAL_WITH_VIO_CLOSE 1 /* Windows stuff, mostly functions, that have Posix analogs but named differently */ -#cmakedefine S_IROTH @S_IROTH@ -#cmakedefine S_IFIFO @S_IFIFO@ -#cmakedefine IPPROTO_IPV6 @IPPROTO_IPV6@ -#cmakedefine IPV6_V6ONLY @IPV6_V6ONLY@ -#cmakedefine sigset_t @sigset_t@ -#cmakedefine mode_t @mode_t@ -#cmakedefine SIGQUIT @SIGQUIT@ -#cmakedefine SIGPIPE @SIGPIPE@ -#cmakedefine popen @popen@ -#cmakedefine pclose @pclose@ -#cmakedefine ssize_t @ssize_t@ -#cmakedefine strcasecmp @strcasecmp@ -#cmakedefine strncasecmp @strncasecmp@ -#cmakedefine snprintf @snprintf@ -#cmakedefine strtok_r @strtok_r@ -#cmakedefine strtoll @strtoll@ -#cmakedefine strtoull @strtoull@ -#cmakedefine vsnprintf @vsnprintf@ -#if defined(_MSC_VER) && (_MSC_VER > 1800) +#ifdef _WIN32 +#define S_IROTH _S_IREAD +#define S_IFIFO _S_IFIFO +#define SIGQUIT SIGTERM +#define SIGPIPE SIGINT +#define sigset_t int +#define mode_t int +#define popen _popen +#define pclose _pclose +#define ssize_t SSIZE_T +#define strcasecmp _stricmp +#define strncasecmp _strnicmp +#define strtok_r strtok_s #define tzname _tzname #define P_tmpdir "C:\\TEMP" -#endif -#if defined(_MSC_VER) && (_MSC_VER > 1310) -# define HAVE_SETENV #define setenv(a,b,c) _putenv_s(a,b) -#endif -#define PSAPI_VERSION 1 /* for GetProcessMemoryInfo() */ -/* We don't want the min/max macros */ -#ifdef _WIN32 +#define HAVE_SETENV #define NOMINMAX 1 -#endif +#define PSAPI_VERSION 2 /* for GetProcessMemoryInfo() */ +#endif /* _WIN32 */ /* MySQL features @@ -457,6 +446,11 @@ /* This should mean case insensitive file system */ #cmakedefine FN_NO_CASE_SENSE 1 +/* Whether an anonymous private mapping is unaccessible after +madvise(MADV_DONTNEED) or madvise(MADV_FREE) or similar has been invoked; +this is the case with Microsoft Windows VirtualFree(MEM_DECOMMIT) */ +#cmakedefine HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT 1 + #cmakedefine HAVE_CHARSET_armscii8 1 #cmakedefine HAVE_CHARSET_ascii 1 #cmakedefine HAVE_CHARSET_big5 1 diff -Nru mariadb-10.11.11/debian/changelog mariadb-10.11.13/debian/changelog --- mariadb-10.11.11/debian/changelog 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/changelog 2025-05-23 21:26:02.000000000 +0000 @@ -1,3 +1,39 @@ +mariadb (1:10.11.13-0+deb12u1) bookworm; urgency=medium + + * New upstream version 10.11.13. Includes fixes for several severe regressions + as noted at https://mariadb.com/kb/en/mariadb-10-11-13-release-notes/, which + were discovered soon after the 10.11.12 release, which was skipped in Debian + intentionally. + * This release includes upstream version 10.11.12, with fixes for regressions + as noted at https://mariadb.com/kb/en/mariadb-10-11-12-release-notes/ + well as security issues (Closes: #1100437, #1105976): + - CVE-2023-52969 + - CVE-2023-52970 + - CVE-2023-52971 + - CVE-2025-30693 + - CVE-2025-30722 + * Drop all RocksDB patches now upstream due to update to version 6.29fb + * New upstream version has now CEST as allowed in main.timezone test + (Closes: #1084293) + * New upstream includes systemd service fix for restarts on crashes + (Closes: #1073847) + * New upstream also fixes regression in INSERT SELECT on NOT NULL columns + while having BEFORE UPDATE trigger (Closes: #1099515) + * Revert "Set CAP_IPC_LOCK capability if possible" because of MDEV-36229 + (Closes: #1100575) + * Update configuration traces to have --ssl-verify-server-cert from MDEV-28908 + * Update configuration traces to include new upstream system variables: + - innodb-buffer-pool-size-auto-min (default: 0) + - innodb-buffer-pool-size-max (default: 0) + - innodb-log-checkpoint-now (default: FALSE) + * Also update configuration traces to match that in 10.11.12 the variables + innodb-buffer-pool-chunk-size and innodb-log-spin-wait-delay are advertised + as deprecated. + * Fix changelog entry formatting in 1:10.11.11-0+deb12u1 + * Salsa CI: Adapt piuparts helper script to new source format in Bookworm + + -- Otto Kekäläinen Fri, 23 May 2025 14:26:02 -0700 + mariadb (1:10.11.11-0+deb12u1) bookworm; urgency=medium [ Otto Kekäläinen ] @@ -27,7 +63,8 @@ unstable in MariaDB 11.4 for a long time, and which are likely needed to avoid occasional shutdown issues, in particular on upgrades (LP: #2034125) in both Debian and Ubuntu - - Make SysV init more verbose in case of MariaDB start failures (Related: #1033234) + - Make SysV init more verbose in case of MariaDB start failures + (Related: #1033234) - Limit check of running mysqld/mariadbd to system users (Closes: #1032047) - When shutting down 'mariadbd', fallback to 'mysqld' * Add Lintian overrides for new upstream documentation JavaScript files diff -Nru mariadb-10.11.11/debian/mariadb-server-core.postinst mariadb-10.11.13/debian/mariadb-server-core.postinst --- mariadb-10.11.11/debian/mariadb-server-core.postinst 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/mariadb-server-core.postinst 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ -#!/bin/bash -set -e - -# shellcheck source=/dev/null -. /usr/share/debconf/confmodule - -if [ -n "$DEBIAN_SCRIPT_DEBUG" ] -then - set -v -x - DEBIAN_SCRIPT_TRACE=1 -fi - -${DEBIAN_SCRIPT_TRACE:+ echo "#42#DEBUG# RUNNING $0 $*" 1>&2} - -export PATH=$PATH:/sbin:/usr/sbin:/bin:/usr/bin - -# inspired by iputils-ping -# -# cap_ipc_lock is required if a user wants to use --memlock -# and has insufficient RLIMIT_MEMLOCK (MDEV-33301) - -PROGRAM=$(dpkg-divert --truename /usr/sbin/mysqld) - -case "$1" in - configure) - # If we have setcap installed, try setting - # which allows us to install our binaries without the setuid - # bit. - if command -v setcap > /dev/null - then - if ! setcap cap_ipc_lock+ep "$PROGRAM" - then - echo "Setcap failed on $PROGRAM, required with --memlock if insufficent RLIMIT_MEMLOCK" >&2 - fi - fi - ;; - - abort-upgrade|abort-remove|abort-configure|triggered) - ;; - - *) - echo "postinst called with unknown argument '$1'" 1>&2 - exit 1 - ;; -esac - -db_stop # in case invoke fails - -#DEBHELPER# diff -Nru mariadb-10.11.11/debian/patches/fix-reproducible-builds-rocksdb.patch mariadb-10.11.13/debian/patches/fix-reproducible-builds-rocksdb.patch --- mariadb-10.11.11/debian/patches/fix-reproducible-builds-rocksdb.patch 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/patches/fix-reproducible-builds-rocksdb.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -Origin: https://github.com/facebook/rocksdb/commit/0a9a05ae12943b1529ef1eabbca5ce5a71c986bf -# Merged in RocksDB 6.19.3, but not updated into MariaDB yet -Bug: https://github.com/facebook/rocksdb/issues/7035 -Author: Otto Kekäläinen -Subject: Make RocksDB build reproducible - -The RocksDB binary included a string with the build timestamp: -> rocksdb_build_git_date:@2021-05-23·16:04:38@ - -As this changes from build to build, it makes the builds unreproducible. -Simply removing it solves the issue. - -This temporary fix can be removed when a proper fix already done in upstream -lands in MariaDB when the RocksDB submodule is updated to a newer release. - ---- a/storage/rocksdb/rocksdb/util/build_version.cc.in -+++ b/storage/rocksdb/rocksdb/util/build_version.cc.in -@@ -1,5 +1,5 @@ - // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - #include "build_version.h" --const char* rocksdb_build_git_sha = "rocksdb_build_git_sha:@@GIT_SHA@@"; --const char* rocksdb_build_git_date = "rocksdb_build_git_date:@@GIT_DATE_TIME@@"; --const char* rocksdb_build_compile_date = __DATE__; -+const char* rocksdb_build_git_sha = "rocksdb_build_git_sha:REDACTED"; -+const char* rocksdb_build_git_date = "rocksdb_build_git_date:REDACTED"; -+const char* rocksdb_build_compile_date = "REDACTED"; diff -Nru mariadb-10.11.11/debian/patches/fix-spelling-rocksdb.patch mariadb-10.11.13/debian/patches/fix-spelling-rocksdb.patch --- mariadb-10.11.11/debian/patches/fix-spelling-rocksdb.patch 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/patches/fix-spelling-rocksdb.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -Forwarded: https://github.com/facebook/rocksdb/pull/9653 -Origin: https://patch-diff.githubusercontent.com/raw/facebook/rocksdb/pull/9653.patch -From: Otto Kekäläinen -Date: Wed, 2 Mar 2022 18:13:18 -0800 -Subject: Fix various spelling errors still found in code - Two upstream PRs remain that have been merged, but not imported on MariaDB yet. - ---- a/storage/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc -+++ b/storage/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc -@@ -46,7 +46,7 @@ - TablePropertiesCollectorFactory::Context::kUnknownColumnFamily && - f.cf_id != cfd_->GetID()) { - return Status::InvalidArgument( -- "External file column family id dont match"); -+ "External file column family id don't match"); - } - } - -@@ -646,7 +646,7 @@ - return Status::InvalidArgument("Global seqno is required, but disabled"); - } else if (file_to_ingest->global_seqno_offset == 0) { - return Status::InvalidArgument( -- "Trying to set global seqno for a file that dont have a global seqno " -+ "Trying to set global seqno for a file that don't have a global seqno " - "field"); - } - ---- a/storage/rocksdb/rocksdb/include/rocksdb/cache.h -+++ b/storage/rocksdb/rocksdb/include/rocksdb/cache.h -@@ -60,7 +60,7 @@ - // If greater than zero, the LRU list will be split into a high-pri - // list and a low-pri list. High-pri entries will be insert to the - // tail of high-pri list, while low-pri entries will be first inserted to -- // the low-pri list (the midpoint). This is refered to as -+ // the low-pri list (the midpoint). This is referred to as - // midpoint insertion strategy to make entries never get hit in cache - // age out faster. - // diff -Nru mariadb-10.11.11/debian/patches/rocksdb-kfreebsd.patch mariadb-10.11.13/debian/patches/rocksdb-kfreebsd.patch --- mariadb-10.11.11/debian/patches/rocksdb-kfreebsd.patch 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/patches/rocksdb-kfreebsd.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,150 +0,0 @@ -Forwarded: https://github.com/facebook/rocksdb/pull/6992 -From: Andrew Kryczka -Date: Tue, 16 Jun 2020 19:34:21 -0700 -# Merged in RocksDB 6.13.fb, but not updated into MariaDB yet -Bug: https://jira.mariadb.org/browse/MDEV-19251 -Description: - Upstream has merged this but we still need to wait for it to be included - in a RocksDB release and imported into MariaDB and then into Debian. ---- a/storage/rocksdb/build_rocksdb.cmake -+++ b/storage/rocksdb/build_rocksdb.cmake -@@ -90,6 +90,8 @@ - add_definitions(-DOS_LINUX) - elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") - add_definitions(-DOS_SOLARIS) -+elseif(CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") -+ add_definitions(-DOS_GNU_KFREEBSD) - elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - add_definitions(-DOS_FREEBSD) - elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD") ---- a/storage/rocksdb/rocksdb/CMakeLists.txt -+++ b/storage/rocksdb/rocksdb/CMakeLists.txt -@@ -91,7 +91,7 @@ - option(WITH_XPRESS "build with windows built in compression" OFF) - include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc) - else() -- if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") -+ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") - # FreeBSD has jemalloc as default malloc - # but it does not have all the jemalloc files in include/... - set(WITH_JEMALLOC ON) -@@ -413,6 +413,8 @@ - add_definitions(-DOS_LINUX) - elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") - add_definitions(-DOS_SOLARIS) -+elseif(CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") -+ add_definitions(-DOS_GNU_KFREEBSD) - elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - add_definitions(-DOS_FREEBSD) - elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD") ---- a/storage/rocksdb/rocksdb/build_tools/build_detect_platform -+++ b/storage/rocksdb/rocksdb/build_tools/build_detect_platform -@@ -190,6 +190,17 @@ - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" - # PORT_FILES=port/freebsd/freebsd_specific.cc - ;; -+ GNU/kFreeBSD) -+ PLATFORM=OS_GNU_KFREEBSD -+ COMMON_FLAGS="$COMMON_FLAGS -DOS_GNU_KFREEBSD" -+ if [ -z "$USE_CLANG" ]; then -+ COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" -+ else -+ PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" -+ fi -+ PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" -+ # PORT_FILES=port/gnu_kfreebsd/gnu_kfreebsd_specific.cc -+ ;; - NetBSD) - PLATFORM=OS_NETBSD - COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD" ---- a/storage/rocksdb/rocksdb/env/env_posix.cc -+++ b/storage/rocksdb/rocksdb/env/env_posix.cc -@@ -41,7 +41,7 @@ - #include - #include - // Get nano time includes --#if defined(OS_LINUX) || defined(OS_FREEBSD) -+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) - #elif defined(__MACH__) - #include - #include -@@ -287,7 +287,8 @@ - } - - uint64_t NowNanos() override { --#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_AIX) -+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \ -+ defined(OS_AIX) - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; -@@ -307,8 +308,8 @@ - } - - uint64_t NowCPUNanos() override { --#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_AIX) || \ -- (defined(__MACH__) && defined(__MAC_10_12)) -+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \ -+ defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12)) - struct timespec ts; - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); - return static_cast(ts.tv_sec) * 1000000000 + ts.tv_nsec; ---- a/storage/rocksdb/rocksdb/port/stack_trace.cc -+++ b/storage/rocksdb/rocksdb/port/stack_trace.cc -@@ -32,7 +32,7 @@ - - namespace { - --#if defined(OS_LINUX) || defined(OS_FREEBSD) -+#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) - const char* GetExecutableName() { - static char name[1024]; - ---- a/storage/rocksdb/rdb_io_watchdog.h -+++ b/storage/rocksdb/rdb_io_watchdog.h -@@ -56,19 +56,19 @@ - int stop_timers() { - int ret = 0; - -- if (m_io_check_watchdog_timer) { -+ if (m_io_check_watchdog_timer != reinterpret_cast(-1)) { - ret = timer_delete(m_io_check_watchdog_timer); - - if (!ret) { -- m_io_check_watchdog_timer = nullptr; -+ m_io_check_watchdog_timer = reinterpret_cast(-1); - } - } - -- if (m_io_check_timer && !ret) { -+ if (m_io_check_timer != reinterpret_cast(-1) && !ret) { - ret = timer_delete(m_io_check_timer); - - if (!ret) { -- m_io_check_timer = nullptr; -+ m_io_check_timer = reinterpret_cast(-1); - } - } - -@@ -93,8 +93,8 @@ - - public: - explicit Rdb_io_watchdog(std::vector &&directories) -- : m_io_check_timer(nullptr), -- m_io_check_watchdog_timer(nullptr), -+ : m_io_check_timer(reinterpret_cast(-1)), -+ m_io_check_watchdog_timer(reinterpret_cast(-1)), - m_io_in_progress(false), - m_dirs_to_check(std::move(directories)), - m_buf(nullptr) { ---- a/storage/rocksdb/rdb_io_watchdog.cc -+++ b/storage/rocksdb/rdb_io_watchdog.cc -@@ -111,7 +111,7 @@ - sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno); - } - -- m_io_check_watchdog_timer = nullptr; -+ m_io_check_watchdog_timer = reinterpret_cast(-1); - - RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); - } diff -Nru mariadb-10.11.11/debian/patches/series mariadb-10.11.13/debian/patches/series --- mariadb-10.11.11/debian/patches/series 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/patches/series 2025-05-23 21:26:02.000000000 +0000 @@ -1,5 +1,2 @@ -rocksdb-kfreebsd.patch env-perl-usr-bin-perl.patch -fix-spelling-rocksdb.patch -fix-reproducible-builds-rocksdb.patch mroonga-mrn-lib-dirs-path-reproducible-build.patch diff -Nru mariadb-10.11.11/debian/salsa-ci-enable-sec-and-update-repos.sh mariadb-10.11.13/debian/salsa-ci-enable-sec-and-update-repos.sh --- mariadb-10.11.11/debian/salsa-ci-enable-sec-and-update-repos.sh 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/salsa-ci-enable-sec-and-update-repos.sh 2025-05-23 21:26:02.000000000 +0000 @@ -1,10 +1,14 @@ #!/bin/sh -set -x -set -e +echo "Running salsa-ci-enable-sec-and-update-repos.sh to enable the same" +echo "repositories thate were available at build time in e.g." +echo "registry.salsa.debian.org/salsa-ci-team/pipeline/base:bullseye" + +# Debug what repositories are available to begin +head /etc/apt/sources.list /etc/apt/sources.list.d/* || true -# Debug what repositories are available to begin with -grep -r "^deb " /etc/apt/sources.* +# Fail on non-zero exit codes from this point onward +set -e # Enable the same repositories that were available at build time in # registry.salsa.debian.org/salsa-ci-team/pipeline/base:bullseye diff -Nru mariadb-10.11.11/debian/salsa-ci.yml mariadb-10.11.13/debian/salsa-ci.yml --- mariadb-10.11.11/debian/salsa-ci.yml 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/salsa-ci.yml 2025-05-23 21:26:02.000000000 +0000 @@ -24,10 +24,17 @@ # For unknown reason Lintian v2.116.3 in Bookworm errors on valid changelog entry SALSA_CI_LINTIAN_SUPPRESS_TAGS: 'bad-distribution-in-changes-file' -# Extend Salsa-CI build jobs to have longer timeout as the default GitLab -# timeout (1h) is often not enough .build-package: + # Extend Salsa CI build jobs to have longer timeout as the default GitLab + # timeout (1h) is often not enough timeout: 3h + # Default 5G sporadically fails builds on not having enough disk space + variables: + CCACHE_MAXSIZE: 3G + # Salsa instance runners typically have 30G volumes with 14G free disk space + before_script: + - echo "Total and free disk space:" + - df -h . stages: - provisioning diff -Nru mariadb-10.11.11/debian/tests/traces/mariadb-verbose-help.expected mariadb-10.11.13/debian/tests/traces/mariadb-verbose-help.expected --- mariadb-10.11.11/debian/tests/traces/mariadb-verbose-help.expected 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/tests/traces/mariadb-verbose-help.expected 2025-05-23 21:26:02.000000000 +0000 @@ -156,9 +156,8 @@ --ssl-crlpath=name Certificate revocation list path (implies --ssl). --tls-version=name TLS protocol version for secure connection. --ssl-verify-server-cert - Verify server's "Common Name" in its cert against - hostname used when connecting. This option is disabled by - default. + Verify server's certificate to prevent man-in-the-middle + attacks -t, --table Output in table format. --tee=name Append everything into outfile. See interactive help (\h) also. Does not work in batch mode. Disable with diff -Nru mariadb-10.11.11/debian/tests/traces/mariadbd-verbose-help.expected mariadb-10.11.13/debian/tests/traces/mariadbd-verbose-help.expected --- mariadb-10.11.11/debian/tests/traces/mariadbd-verbose-help.expected 2025-02-19 00:56:41.000000000 +0000 +++ mariadb-10.11.13/debian/tests/traces/mariadbd-verbose-help.expected 2025-05-23 21:26:02.000000000 +0000 @@ -575,9 +575,7 @@ FORCE_PLUS_PERMANENT (like FORCE, but the plugin can not be uninstalled). --innodb-buffer-pool-chunk-size=# - Size of a single memory chunk for resizing buffer pool. - Online buffer pool resizing happens at this granularity. - 0 means autosize this variable based on buffer pool size. + Deprecated parameter with no effect --innodb-buffer-pool-dump-at-shutdown Dump the buffer pool into a file named @@innodb_buffer_pool_filename @@ -603,6 +601,11 @@ --innodb-buffer-pool-size=# The size of the memory buffer InnoDB uses to cache data and indexes of its tables. + --innodb-buffer-pool-size-auto-min=# + Minimum innodb_buffer_pool_size for dynamic shrinking on + memory pressure + --innodb-buffer-pool-size-max=# + Maximum innodb_buffer_pool_size --innodb-buffer-pool-stats[=name] Enable or disable INNODB_BUFFER_POOL_STATS plugin. One of: ON, OFF, FORCE (don't start if the plugin fails to @@ -883,6 +886,9 @@ be uninstalled). --innodb-log-buffer-size=# Redo log buffer size in bytes. + --innodb-log-checkpoint-now + Write back dirty pages from the buffer pool and update + the log checkpoint --innodb-log-file-buffering Whether the file system cache for ib_logfile0 is enabled --innodb-log-file-mmap @@ -894,8 +900,7 @@ --innodb-log-group-home-dir=name Path to ib_logfile0 --innodb-log-spin-wait-delay[=#] - Delay between log buffer spin lock polls (0 to use a - blocking latch) + Deprecated parameter with no effect --innodb-log-write-ahead-size=# Redo log write size to avoid read-on-write; must be a power of two @@ -1449,7 +1454,8 @@ keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in - selectivity_for_indexes. selectivity_multiplier. This + selectivity_for_indexes. fix_derived_table_read_cost = + Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. Use 'ALL' to set all combinations. @@ -2611,6 +2617,8 @@ innodb-buffer-pool-load-at-startup TRUE innodb-buffer-pool-load-now FALSE innodb-buffer-pool-size 134217728 +innodb-buffer-pool-size-auto-min 0 +innodb-buffer-pool-size-max 0 innodb-buffer-pool-stats ON innodb-change-buffer-max-size 25 innodb-change-buffering none @@ -2685,6 +2693,7 @@ innodb-lock-waits ON innodb-locks ON innodb-log-buffer-size 16777216 +innodb-log-checkpoint-now FALSE innodb-log-file-buffering FALSE innodb-log-file-mmap TRUE innodb-log-file-size 100663296 diff -Nru mariadb-10.11.11/extra/mariabackup/backup_mysql.cc mariadb-10.11.13/extra/mariabackup/backup_mysql.cc --- mariadb-10.11.11/extra/mariabackup/backup_mysql.cc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/extra/mariabackup/backup_mysql.cc 2025-05-19 16:14:24.000000000 +0000 @@ -1893,7 +1893,7 @@ srv_log_file_size, srv_page_size, srv_undo_dir, - (uint) srv_undo_tablespaces, + srv_undo_tablespaces, page_zip_level, innobase_buffer_pool_filename ? "innodb_buffer_pool_filename=" : "", diff -Nru mariadb-10.11.11/extra/mariabackup/common_engine.cc mariadb-10.11.13/extra/mariabackup/common_engine.cc --- mariadb-10.11.11/extra/mariabackup/common_engine.cc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/extra/mariabackup/common_engine.cc 2025-05-19 16:14:24.000000000 +0000 @@ -64,8 +64,10 @@ for (const auto &fname : m_fnames) { File file = mysql_file_open(0, fname.c_str(),O_RDONLY | O_SHARE, MYF(0)); if (file < 0) { - msg(thread_num, "Error on file %s open during %s table copy", - fname.c_str(), full_tname.c_str()); + char buf[MYSYS_STRERROR_SIZE]; + msg(thread_num, "Error %i on file %s open during %s table copy: %s", + errno, fname.c_str(), full_tname.c_str(), + my_strerror(buf, sizeof(buf), errno)); goto exit; } files.push_back(file); diff -Nru mariadb-10.11.11/extra/mariabackup/innobackupex.cc mariadb-10.11.13/extra/mariabackup/innobackupex.cc --- mariadb-10.11.11/extra/mariabackup/innobackupex.cc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/extra/mariabackup/innobackupex.cc 2025-05-19 16:14:24.000000000 +0000 @@ -44,8 +44,8 @@ #include #include #include -#include #include +#include "buf0buf.h" #include #include #include @@ -594,8 +594,9 @@ "--apply-log.", (uchar*) &ibx_xtrabackup_use_memory, (uchar*) &ibx_xtrabackup_use_memory, - 0, GET_LL, REQUIRED_ARG, 100*1024*1024L, 1024*1024L, LONGLONG_MAX, 0, - 1024*1024L, 0}, + 0, GET_LL, REQUIRED_ARG, 96 << 20, + innodb_buffer_pool_extent_size, SIZE_T_MAX, 0, + innodb_buffer_pool_extent_size, 0}, {"innodb-force-recovery", OPT_INNODB_FORCE_RECOVERY, "This option starts up the embedded InnoDB instance in crash " diff -Nru mariadb-10.11.11/extra/mariabackup/write_filt.cc mariadb-10.11.13/extra/mariabackup/write_filt.cc --- mariadb-10.11.11/extra/mariabackup/write_filt.cc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/extra/mariabackup/write_filt.cc 2025-05-19 16:14:24.000000000 +0000 @@ -144,18 +144,6 @@ return false; } - /* Check whether TRX_SYS page has been changed */ - if (mach_read_from_4(page + FIL_PAGE_SPACE_ID) - == TRX_SYS_SPACE - && mach_read_from_4(page + FIL_PAGE_OFFSET) - == TRX_SYS_PAGE_NO) { - msg(cursor->thread_n, - "--incremental backup is impossible if " - "the server had been restarted with " - "different innodb_undo_tablespaces."); - return false; - } - /* updated page */ if (cp->npages == page_size / 4) { /* flush buffer */ diff -Nru mariadb-10.11.11/extra/mariabackup/xtrabackup.cc mariadb-10.11.13/extra/mariabackup/xtrabackup.cc --- mariadb-10.11.11/extra/mariabackup/xtrabackup.cc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/extra/mariabackup/xtrabackup.cc 2025-05-19 16:14:24.000000000 +0000 @@ -201,8 +201,6 @@ xb_filter_entry_t *name_hash; }; -lsn_t checkpoint_lsn_start; -lsn_t checkpoint_no_start; /** whether log_copying_thread() is active; protected by recv_sys.mutex */ static bool log_copying_running; /** for --backup, target LSN to copy the log to; protected by recv_sys.mutex */ @@ -1383,6 +1381,7 @@ OPT_XTRA_MYSQLD_ARGS, OPT_XB_IGNORE_INNODB_PAGE_CORRUPTION, OPT_INNODB_FORCE_RECOVERY, + OPT_INNODB_CHECKPOINT, OPT_ARIA_LOG_DIR_PATH }; @@ -1414,8 +1413,9 @@ "The value is used in place of innodb_buffer_pool_size. " "This option is only relevant when the --prepare option is specified.", (G_PTR *) &xtrabackup_use_memory, (G_PTR *) &xtrabackup_use_memory, 0, - GET_LL, REQUIRED_ARG, 100 * 1024 * 1024L, 1024 * 1024L, LONGLONG_MAX, 0, - 1024 * 1024L, 0}, + GET_ULL, REQUIRED_ARG, 96 << 20, innodb_buffer_pool_extent_size, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), + 0, innodb_buffer_pool_extent_size, 0}, {"throttle", OPT_XTRA_THROTTLE, "limit count of IO operations (pairs of read&write) per second to IOS " "values (for '--backup')", @@ -1787,10 +1787,7 @@ static const char *dbug_option; #endif -#ifdef HAVE_URING -extern const char *io_uring_may_be_unsafe; -bool innodb_use_native_aio_default(); -#endif +static my_bool innodb_log_checkpoint_now; struct my_option xb_server_options[] = { @@ -1927,12 +1924,7 @@ "Use native AIO if supported on this platform.", (G_PTR*) &srv_use_native_aio, (G_PTR*) &srv_use_native_aio, 0, GET_BOOL, NO_ARG, -#ifdef HAVE_URING - innodb_use_native_aio_default(), -#else - TRUE, -#endif - 0, 0, 0, 0, 0}, + TRUE, 0, 0, 0, 0, 0}, {"innodb_page_size", OPT_INNODB_PAGE_SIZE, "The universal page size of the database.", (G_PTR*) &innobase_page_size, (G_PTR*) &innobase_page_size, 0, @@ -2019,6 +2011,12 @@ (G_PTR*)&srv_force_recovery, 0, GET_ULONG, OPT_ARG, 0, 0, SRV_FORCE_IGNORE_CORRUPT, 0, 0, 0}, + {"innodb_log_checkpoint_now", OPT_INNODB_CHECKPOINT, + "(for --backup): Force an InnoDB checkpoint", + (G_PTR*)&innodb_log_checkpoint_now, + (G_PTR*)&innodb_log_checkpoint_now, + 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, + {"mysqld-args", OPT_XTRA_MYSQLD_ARGS, "All arguments that follow this argument are considered as server " "options, and if some of them are not supported by mariabackup, they " @@ -2482,7 +2480,7 @@ } srv_sys_space.normalize_size(); - srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift); + srv_lock_table_size = 5 * buf_pool.curr_size(); /* -------------- Log files ---------------------------*/ @@ -2504,11 +2502,8 @@ srv_adaptive_flushing = FALSE; - /* We set srv_pool_size here in units of 1 kB. InnoDB internally - changes the value so that it becomes the number of database pages. */ - - srv_buf_pool_size = (ulint) xtrabackup_use_memory; - srv_buf_pool_chunk_unit = srv_buf_pool_size; + buf_pool.size_in_bytes_max = size_t(xtrabackup_use_memory); + buf_pool.size_in_bytes_requested = buf_pool.size_in_bytes_max; srv_n_read_io_threads = (uint) innobase_read_io_threads; srv_n_write_io_threads = (uint) innobase_write_io_threads; @@ -2534,12 +2529,8 @@ msg("InnoDB: Using Linux native AIO"); } #elif defined(HAVE_URING) - if (!srv_use_native_aio) { - } else if (io_uring_may_be_unsafe) { - msg("InnoDB: Using liburing on this kernel %s may cause hangs;" - " see https://jira.mariadb.org/browse/MDEV-26674", - io_uring_may_be_unsafe); - } else { + + if (srv_use_native_aio) { msg("InnoDB: Using liburing"); } #else @@ -2679,7 +2670,7 @@ } recv_sys.lsn= log_sys.next_checkpoint_lsn= - log_sys.get_lsn() - SIZE_OF_FILE_CHECKPOINT; + log_get_lsn() - SIZE_OF_FILE_CHECKPOINT; log_sys.set_latest_format(false); // not encrypted log_hdr_init(); byte *b= &log_hdr_buf[log_t::START_OFFSET]; @@ -2946,6 +2937,15 @@ const regex_list_t& list, const char* name) { + if (list.empty()) return (FALSE); + + /* + regexec/pcre2_regexec is not threadsafe, also documented. + Serialize access from multiple threads to compiled regexes. + */ + static std::mutex regex_match_mutex; + std::lock_guard lock(regex_match_mutex); + regmatch_t tables_regmatch[1]; for (regex_list_t::const_iterator i = list.begin(), end = list.end(); i != end; ++i) { @@ -5405,6 +5405,14 @@ } msg("cd to %s", mysql_real_data_home); encryption_plugin_backup_init(mysql_connection); + if (innodb_log_checkpoint_now != false && mysql_send_query( + mysql_connection, + C_STRING_WITH_LEN("SET GLOBAL " + "innodb_log_checkpoint_now=ON;"))) { + msg("initiating checkpoint failed"); + return(false); + } + msg("open files limit requested %lu, set to %lu", xb_open_files_limit, xb_set_max_open_files(xb_open_files_limit)); @@ -5517,6 +5525,11 @@ goto fail; } + /* try to wait for a log checkpoint, but do not fail if the + server does not support this */ + if (innodb_log_checkpoint_now != false) { + mysql_read_query_result(mysql_connection); + } /* label it */ recv_sys.file_checkpoint = log_sys.next_checkpoint_lsn; log_hdr_init(); @@ -6230,9 +6243,22 @@ buf + FSP_HEADER_OFFSET + FSP_SIZE); if (mach_read_from_4(buf + FIL_PAGE_SPACE_ID)) { +#ifdef _WIN32 + os_offset_t last_page = + os_file_get_size(dst_file) / + page_size; + + /* os_file_set_size() would + shrink the size of the file */ + if (last_page < n_pages && + !os_file_set_size( + dst_path, dst_file, + n_pages * page_size)) +#else if (!os_file_set_size( dst_path, dst_file, n_pages * page_size)) +#endif /* _WIN32 */ goto error; } else if (fil_space_t* space = fil_system.sys_space) { diff -Nru mariadb-10.11.11/include/json_lib.h mariadb-10.11.13/include/json_lib.h --- mariadb-10.11.11/include/json_lib.h 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/include/json_lib.h 2025-05-19 16:14:24.000000000 +0000 @@ -387,7 +387,7 @@ Returns negative integer in the case of an error, the length of the result otherwise. */ -int json_unescape(CHARSET_INFO *json_cs, +int __attribute__((warn_unused_result)) json_unescape(CHARSET_INFO *json_cs, const uchar *json_str, const uchar *json_end, CHARSET_INFO *res_cs, uchar *res, uchar *res_end); @@ -401,7 +401,8 @@ JSON_ERROR_OUT_OF_SPACE Not enough space in the provided buffer JSON_ERROR_ILLEGAL_SYMBOL Source symbol cannot be represented in JSON */ -int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end, +int __attribute__((warn_unused_result)) json_escape(CHARSET_INFO *str_cs, + const uchar *str, const uchar *str_end, CHARSET_INFO *json_cs, uchar *json, uchar *json_end); diff -Nru mariadb-10.11.11/include/my_base.h mariadb-10.11.13/include/my_base.h --- mariadb-10.11.11/include/my_base.h 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/include/my_base.h 2025-05-19 16:14:24.000000000 +0000 @@ -219,7 +219,10 @@ /** Start writing rows during ALTER TABLE...ALGORITHM=COPY. */ HA_EXTRA_BEGIN_ALTER_COPY, /** Finish writing rows during ALTER TABLE...ALGORITHM=COPY. */ - HA_EXTRA_END_ALTER_COPY + HA_EXTRA_END_ALTER_COPY, + /** Abort of writing rows during ALTER TABLE..ALGORITHM=COPY or + CREATE..SELCT */ + HA_EXTRA_ABORT_ALTER_COPY }; /* Compatible option, to be deleted in 6.0 */ diff -Nru mariadb-10.11.11/include/my_cpu.h mariadb-10.11.13/include/my_cpu.h --- mariadb-10.11.11/include/my_cpu.h 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/include/my_cpu.h 2025-05-19 16:14:24.000000000 +0000 @@ -97,7 +97,12 @@ /* Changed from __ppc_get_timebase for musl and clang compatibility */ __builtin_ppc_get_timebase(); #elif defined __GNUC__ && defined __riscv - __builtin_riscv_pause(); + /* The GCC-only __builtin_riscv_pause() or the pause instruction is + encoded like a fence instruction with special parameters. On RISC-V + implementations that do not support arch=+zihintpause this + instruction could be interpreted as a more expensive memory fence; + it should not be an illegal instruction. */ + __asm__ volatile(".long 0x0100000f" ::: "memory"); #elif defined __GNUC__ /* Mainly, prevent the compiler from optimizing away delay loops */ __asm__ __volatile__ ("":::"memory"); diff -Nru mariadb-10.11.11/include/my_stack_alloc.h mariadb-10.11.13/include/my_stack_alloc.h --- mariadb-10.11.11/include/my_stack_alloc.h 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/include/my_stack_alloc.h 2025-05-19 16:14:24.000000000 +0000 @@ -38,6 +38,8 @@ #if defined(__GNUC__) || defined(__clang__) /* GCC and Clang compilers */ #if defined(__i386__) /* Intel x86 (32-bit) */ __asm__ volatile ("movl %%esp, %0" : "=r" (stack_ptr)); +#elif defined(__x86_64__) && defined (__ILP32__) /* Intel x86-64 (64-bit), X32 ABI */ + __asm__ volatile ("movl %%esp, %0" : "=r" (stack_ptr)); #elif defined(__x86_64__) /* Intel x86-64 (64-bit) */ __asm__ volatile ("movq %%rsp, %0" : "=r" (stack_ptr)); #elif defined(__powerpc__) /* PowerPC (32-bit) */ diff -Nru mariadb-10.11.11/include/my_sys.h mariadb-10.11.13/include/my_sys.h --- mariadb-10.11.11/include/my_sys.h 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/include/my_sys.h 2025-05-19 16:14:24.000000000 +0000 @@ -173,9 +173,15 @@ extern void *my_memdup(PSI_memory_key key, const void *from,size_t length,myf MyFlags); extern char *my_strdup(PSI_memory_key key, const char *from,myf MyFlags); extern char *my_strndup(PSI_memory_key key, const char *from, size_t length, myf MyFlags); +extern my_bool my_use_large_pages; -int my_init_large_pages(my_bool super_large_pages); +int my_init_large_pages(void); uchar *my_large_malloc(size_t *size, myf my_flags); +#ifdef _WIN32 +/* On Windows, use my_virtual_mem_reserve() and my_virtual_mem_commit(). */ +#else +char *my_large_virtual_alloc(size_t *size); +#endif void my_large_free(void *ptr, size_t size); void my_large_page_truncate(size_t *size); diff -Nru mariadb-10.11.11/include/my_virtual_mem.h mariadb-10.11.13/include/my_virtual_mem.h --- mariadb-10.11.11/include/my_virtual_mem.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/include/my_virtual_mem.h 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,37 @@ +/* Copyright (c) 2025, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#pragma once +/* + Functionality for handling virtual memory + (reserve, commit, decommit, release) +*/ +#include /*size_t*/ + +#ifdef __cplusplus +extern "C" { +#endif + +# ifdef _WIN32 +char *my_virtual_mem_reserve(size_t *size); +# endif +char *my_virtual_mem_commit(char *ptr, size_t size); +void my_virtual_mem_decommit(char *ptr, size_t size); +void my_virtual_mem_release(char *ptr, size_t size); + +#ifdef __cplusplus +} +#endif + diff -Nru mariadb-10.11.11/include/source_revision.h mariadb-10.11.13/include/source_revision.h --- mariadb-10.11.11/include/source_revision.h 2025-01-30 11:01:27.000000000 +0000 +++ mariadb-10.11.13/include/source_revision.h 2025-05-19 16:14:28.000000000 +0000 @@ -1 +1 @@ -#define SOURCE_REVISION "e69f8cae1a15e15b9e4f5e0f8497e1f17bdc81a4" +#define SOURCE_REVISION "8fb09426b98583916ccfd4f8c49741adc115bac3" diff -Nru mariadb-10.11.11/include/sslopt-longopts.h mariadb-10.11.13/include/sslopt-longopts.h --- mariadb-10.11.11/include/sslopt-longopts.h 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/include/sslopt-longopts.h 2025-05-19 16:14:24.000000000 +0000 @@ -51,8 +51,7 @@ #ifdef MYSQL_CLIENT {"ssl-verify-server-cert", 0, - "Verify server's \"Common Name\" in its cert against hostname used " - "when connecting. This option is disabled by default.", + "Verify server's certificate to prevent man-in-the-middle attacks", &opt_ssl_verify_server_cert, &opt_ssl_verify_server_cert, 0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, #endif diff -Nru mariadb-10.11.11/libmariadb/CMakeLists.txt mariadb-10.11.13/libmariadb/CMakeLists.txt --- mariadb-10.11.11/libmariadb/CMakeLists.txt 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/CMakeLists.txt 2025-05-19 16:14:27.000000000 +0000 @@ -52,7 +52,7 @@ SET(CPACK_PACKAGE_VERSION_MAJOR 3) SET(CPACK_PACKAGE_VERSION_MINOR 3) -SET(CPACK_PACKAGE_VERSION_PATCH 14) +SET(CPACK_PACKAGE_VERSION_PATCH 16) SET(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") MATH(EXPR MARIADB_PACKAGE_VERSION_ID "${CPACK_PACKAGE_VERSION_MAJOR} * 10000 + ${CPACK_PACKAGE_VERSION_MINOR} * 100 + diff -Nru mariadb-10.11.11/libmariadb/include/errmsg.h mariadb-10.11.13/libmariadb/include/errmsg.h --- mariadb-10.11.11/libmariadb/include/errmsg.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/include/errmsg.h 2025-05-19 16:14:27.000000000 +0000 @@ -115,10 +115,11 @@ #define CR_BINLOG_INVALID_FILE 5022 #define CR_BINLOG_SEMI_SYNC_ERROR 5023 #define CR_INVALID_CLIENT_FLAG 5024 +#define CR_ERR_MISSING_ERROR_INFO 5026 /* Always last, if you add new error codes please update the value for CR_MARIADB_LAST_ERROR */ -#define CR_MARIADB_LAST_ERROR CR_INVALID_CLIENT_FLAG +#define CR_MARIADB_LAST_ERROR CR_ERR_MISSING_ERROR_INFO #endif diff -Nru mariadb-10.11.11/libmariadb/include/ma_context.h mariadb-10.11.13/libmariadb/include/ma_context.h --- mariadb-10.11.11/libmariadb/include/ma_context.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/include/ma_context.h 2025-05-19 16:14:27.000000000 +0000 @@ -26,8 +26,33 @@ (This particular implementation uses Posix ucontext swapcontext().) */ + +/* + When running with address sanitizer, the stack switching can cause confusion + unless the __sanitizer_{start,finish}_switch_fiber() functions are used + (CONC-618). + + In this case prefer the use of boost::context or ucontext, which should have + this instrumentation, over our custom assembler variants. +*/ +#ifdef __has_feature + /* Clang */ +# if __has_feature(address_sanitizer) +# define ASAN_PREFER_NON_ASM 1 +# endif +#else + /* GCC */ +# ifdef __SANITIZE_ADDRESS__ +# define ASAN_PREFER_NON_ASM 1 +# endif +#endif + #ifdef _WIN32 #define MY_CONTEXT_USE_WIN32_FIBERS 1 +#elif defined(ASAN_PREFER_NON_ASM) && defined(HAVE_BOOST_CONTEXT_H) +#define MY_CONTEXT_USE_BOOST_CONTEXT +#elif defined(ASAN_PREFER_NON_ASM) && defined(HAVE_UCONTEXT_H) +#define MY_CONTEXT_USE_UCONTEXT #elif defined(__GNUC__) && __GNUC__ >= 3 && defined(__x86_64__) && !defined(__ILP32__) #define MY_CONTEXT_USE_X86_64_GCC_ASM #elif defined(__GNUC__) && __GNUC__ >= 3 && defined(__i386__) diff -Nru mariadb-10.11.11/libmariadb/include/mariadb_com.h mariadb-10.11.13/libmariadb/include/mariadb_com.h --- mariadb-10.11.11/libmariadb/include/mariadb_com.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/include/mariadb_com.h 2025-05-19 16:14:27.000000000 +0000 @@ -423,6 +423,28 @@ double max_value_dbl; }; + /* The following is for user defined functions */ + +typedef struct st_udf_args +{ + unsigned int arg_count; /* Number of arguments */ + enum Item_result *arg_type; /* Pointer to item_results */ + char **args; /* Pointer to argument */ + unsigned long *lengths; /* Length of string arguments */ + char *maybe_null; /* Set to 1 for all maybe_null args */ +} UDF_ARGS; + + /* This holds information about the result */ + +typedef struct st_udf_init +{ + my_bool maybe_null; /* 1 if function can return NULL */ + unsigned int decimals; /* for real functions */ + unsigned int max_length; /* For string functions */ + char *ptr; /* free pointer for function data */ + my_bool const_item; /* 0 if result is independent of arguments */ +} UDF_INIT; + /* Connection types */ #define MARIADB_CONNECTION_UNIXSOCKET 0 #define MARIADB_CONNECTION_TCP 1 diff -Nru mariadb-10.11.11/libmariadb/libmariadb/CMakeLists.txt mariadb-10.11.13/libmariadb/libmariadb/CMakeLists.txt --- mariadb-10.11.11/libmariadb/libmariadb/CMakeLists.txt 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/libmariadb/CMakeLists.txt 2025-05-19 16:14:27.000000000 +0000 @@ -168,12 +168,6 @@ mysql_use_result mysql_warning_count) -# some gcc versions fail to compile asm parts of my_context.c, -# if build type is "Release" (see CONC-133), so we need to add -g flag -IF(CMAKE_COMPILER_IS_GNUCC AND CMAKE_BUILD_TYPE MATCHES "Release") - SET_SOURCE_FILES_PROPERTIES(my_context.c PROPERTIES COMPILE_FLAGS -g) -ENDIF() - IF(ZLIB_FOUND AND WITH_EXTERNAL_ZLIB) INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) ELSE() diff -Nru mariadb-10.11.11/libmariadb/libmariadb/ma_context.c mariadb-10.11.13/libmariadb/libmariadb/ma_context.c --- mariadb-10.11.11/libmariadb/libmariadb/ma_context.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/libmariadb/ma_context.c 2025-05-19 16:14:27.000000000 +0000 @@ -105,9 +105,23 @@ c->user_func= f; c->user_data= d; c->active= 1; + u.a[1]= 0; /* Otherwise can give uninitialized warnings on 32-bit. */ u.p= c; + /* + makecontext function expects function pointer to receive multiple + ints as an arguments, however is declared in ucontext.h header with + a void (empty) argument list. Ignore clang cast-function-type-strict + warning for this function call. + */ +# ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wcast-function-type-strict" +# endif makecontext(&c->spawned_context, (uc_func_t)my_context_spawn_internal, 2, u.a[0], u.a[1]); +# ifdef __clang__ +# pragma clang diagnostic pop +# endif return my_context_continue(c); } @@ -204,7 +218,7 @@ ( "movq %%rsp, (%[save])\n\t" "movq %[stack], %%rsp\n\t" -#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __clang__) && !defined(__INTEL_COMPILER) +#if defined(__GCC_HAVE_DWARF2_CFI_ASM) || (defined(__clang__) && __clang_major__ < 13) /* This emits a DWARF DW_CFA_undefined directive to make the return address undefined. This indicates that this is the top of the stack frame, and @@ -440,7 +454,7 @@ ( "movl %%esp, (%[save])\n\t" "movl %[stack], %%esp\n\t" -#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __clang__) && !defined(__INTEL_COMPILER) +#if defined(__GCC_HAVE_DWARF2_CFI_ASM) || (defined(__clang__) && __clang_major__ < 13) /* This emits a DWARF DW_CFA_undefined directive to make the return address undefined. This indicates that this is the top of the stack frame, and @@ -675,7 +689,7 @@ ( "mov x10, sp\n\t" "mov sp, %[stack]\n\t" -#if !defined(__INTEL_COMPILER) +#if defined(__GCC_HAVE_DWARF2_CFI_ASM) || (defined(__clang__) && __clang_major__ < 13) /* This emits a DWARF DW_CFA_undefined directive to make the return address (UNW_AARCH64_X30) undefined. This indicates that this is the top of the @@ -724,7 +738,11 @@ [stack] "+r" (stack) : [save] "r" (save) : "x3", "x4", "x5", "x6", "x7", - "x9", "x10", "x11", "x14", "x15", "x18", "x30", + "x9", "x10", "x11", "x14", "x15", +#if defined(__linux__) && !defined(__ANDROID__) + "x18", +#endif + "x30", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", @@ -827,7 +845,11 @@ : [ret] "=r" (ret) : [save] "r" (save) : "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x18", "x30", + "x9", "x10", "x11", "x12", "x13", "x14", "x15", +#if defined(__linux__) && !defined(__ANDROID__) + "x18", +#endif + "x30", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", @@ -904,7 +926,11 @@ : : [save] "r" (save) : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x18", "x30", + "x9", "x10", "x11", "x12", "x13", "x14", "x15", +#if defined(__linux__) && !defined(__ANDROID__) + "x18", +#endif + "x30", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", diff -Nru mariadb-10.11.11/libmariadb/libmariadb/ma_errmsg.c mariadb-10.11.13/libmariadb/libmariadb/ma_errmsg.c --- mariadb-10.11.11/libmariadb/libmariadb/ma_errmsg.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/libmariadb/ma_errmsg.c 2025-05-19 16:14:27.000000000 +0000 @@ -119,6 +119,8 @@ /* 5022 */ "File '%s' is not a binary log file", /* 5023 */ "Semi sync request error: %s", /* 5024 */ "Invalid client flags (%lu) specified. Supported flags: %lu", + /* 5025 */ "", + /* 5026 */ "Server returned an error packet without further information", "" }; diff -Nru mariadb-10.11.11/libmariadb/libmariadb/mariadb_lib.c mariadb-10.11.13/libmariadb/libmariadb/mariadb_lib.c --- mariadb-10.11.11/libmariadb/libmariadb/mariadb_lib.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/libmariadb/mariadb_lib.c 2025-05-19 16:14:27.000000000 +0000 @@ -81,7 +81,7 @@ #define strncasecmp _strnicmp #endif -#define ASYNC_CONTEXT_DEFAULT_STACK_SIZE (4096*15) +#define ASYNC_CONTEXT_DEFAULT_STACK_SIZE (256*1024) #define MA_RPL_VERSION_HACK "5.5.5-" #define CHARSET_NAME_LEN 64 @@ -274,6 +274,11 @@ ma_strmake(net->last_error,(char*) pos, min(len,sizeof(net->last_error)-1)); } + /* MDEV-35935: if server sends error packet without error, we have to + set error manually */ + if (!net->last_errno) { + my_set_error(mysql, CR_ERR_MISSING_ERROR_INFO, SQLSTATE_UNKNOWN, 0); + } } else { @@ -402,7 +407,7 @@ /* CONC-589: If reconnect option was specified, we have to check if the connection (socket) is still available */ - if (command != COM_QUIT && mysql->options.reconnect && ma_pvio_is_alive(mysql->net.pvio)) + if (command != COM_QUIT && mysql->options.reconnect && !ma_pvio_is_alive(mysql->net.pvio)) { ma_pvio_close(mysql->net.pvio); mysql->net.pvio= NULL; diff -Nru mariadb-10.11.11/libmariadb/libmariadb/mariadb_stmt.c mariadb-10.11.13/libmariadb/libmariadb/mariadb_stmt.c --- mariadb-10.11.11/libmariadb/libmariadb/mariadb_stmt.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/libmariadb/mariadb_stmt.c 2025-05-19 16:14:27.000000000 +0000 @@ -425,6 +425,9 @@ stmt->bind[i].is_null= &stmt->bind[i].is_null_value; *stmt->bind[i].is_null= 1; stmt->bind[i].u.row_ptr= NULL; + if (!stmt->bind[i].length) + stmt->bind[i].length= &stmt->bind[i].length_value; + *stmt->bind[i].length= stmt->bind[i].length_value= 0; } } else { @@ -437,6 +440,9 @@ if (stmt->result_callback) stmt->result_callback(stmt->user_data, i, &row); else { + if (!stmt->bind[i].is_null) + stmt->bind[i].is_null= &stmt->bind[i].is_null_value; + *stmt->bind[i].is_null= 0; if (mysql_ps_fetch_functions[stmt->fields[i].type].pack_len >= 0) length= mysql_ps_fetch_functions[stmt->fields[i].type].pack_len; else diff -Nru mariadb-10.11.11/libmariadb/plugins/pvio/pvio_socket.c mariadb-10.11.13/libmariadb/plugins/pvio/pvio_socket.c --- mariadb-10.11.11/libmariadb/plugins/pvio/pvio_socket.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/plugins/pvio/pvio_socket.c 2025-05-19 16:14:27.000000000 +0000 @@ -1101,10 +1101,10 @@ res= poll(&poll_fd, 1, 0); if (res <= 0) /* timeout or error */ - return FALSE; + return TRUE; if (!(poll_fd.revents & (POLLIN | POLLPRI))) - return FALSE; - return TRUE; + return TRUE; + return FALSE; #else /* We can't use the WSAPoll function, it's broken :-( (see Windows 8 Bugs 309411 - WSAPoll does not report failed connections) @@ -1117,8 +1117,8 @@ res= select((int)csock->socket + 1, &sfds, NULL, NULL, &tv); if (res > 0 && FD_ISSET(csock->socket, &sfds)) - return TRUE; - return FALSE; + return FALSE; + return TRUE; #endif } /* }}} */ diff -Nru mariadb-10.11.11/libmariadb/unittest/libmariadb/connection.c mariadb-10.11.13/libmariadb/unittest/libmariadb/connection.c --- mariadb-10.11.11/libmariadb/unittest/libmariadb/connection.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/unittest/libmariadb/connection.c 2025-05-19 16:14:27.000000000 +0000 @@ -2339,6 +2339,7 @@ MYSQL *mysql; int i; const char *ciphers[3]= {"TLS_AES_128_GCM_SHA256", "TLS_AES_256_GCM_SHA384", "TLS_CHACHA20_POLY1305_SHA256"}; + my_bool verify= 0; SKIP_MAXSCALE; @@ -2348,6 +2349,7 @@ mysql= mysql_init(NULL); mysql_ssl_set(mysql, NULL, NULL, NULL, NULL, NULL); + mysql_optionsv(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT, &verify); mysql_optionsv(mysql, MYSQL_OPT_SSL_CIPHER, ciphers[i]); if (!my_test_connect(mysql, hostname, username, @@ -2370,7 +2372,6 @@ static int test_conc589(MYSQL *my) { MYSQL *mysql= mysql_init(NULL); - MYSQL_RES *result; int rc; my_bool reconnect= 1, verify= 0; unsigned long last_thread_id= 0; @@ -2391,15 +2392,85 @@ check_mysql_rc(rc, mysql); last_thread_id= mysql_thread_id(mysql); + rc= mysql_query(mysql, "SET @a:=1"); + check_mysql_rc(rc, mysql); + + sleep(10); + + rc= mysql_query(mysql, "SET @a:=2"); + check_mysql_rc(rc, mysql); + FAIL_IF(mysql_thread_id(mysql) == last_thread_id, "Expected new connection id"); + last_thread_id= mysql_thread_id(mysql); + + mysql_kill(my, last_thread_id); + + sleep(10); + + rc= mysql_query(mysql, "SET @a:=3"); + check_mysql_rc(rc, mysql); + FAIL_IF(mysql_thread_id(mysql) == last_thread_id, "Expected new connection id"); + mysql_close(mysql); + return OK; +} + +#ifdef WIN32 +static int test_conc760(MYSQL *my) +{ + MYSQL *mysql= mysql_init(NULL); + MYSQL_RES *result; + MYSQL_ROW row; + int rc; + char named_pipe_name[128]; + my_bool reconnect= 1, verify= 0; + unsigned long last_thread_id= 0; + unsigned int protocol= MYSQL_PROTOCOL_PIPE; + my_bool have_named_pipe= 0; + + SKIP_MAXSCALE; + + rc= mysql_query(my, "select @@named_pipe, @@socket"); + check_mysql_rc(rc, mysql); + + if ((result= mysql_store_result(my))) + { + if((row= mysql_fetch_row(result))) + have_named_pipe= atoi(row[0]); + strncpy(named_pipe_name, row[1], sizeof(named_pipe_name)-1); + named_pipe_name[sizeof(named_pipe_name)-1]= '\0'; + mysql_free_result(result); + } + + if (!have_named_pipe) + { + diag("Server doesn't support named pipes"); + return SKIP; + } + + mysql_options(mysql, MYSQL_OPT_RECONNECT, &reconnect); + mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT, &verify); + mysql_options(mysql, MYSQL_OPT_PROTOCOL, &protocol); + + if (!my_test_connect(mysql, hostname, username, + password, schema, port, named_pipe_name, CLIENT_REMEMBER_OPTIONS)) + { + diag("error: %s", mysql_error(mysql)); + return FAIL; + } + + rc= mysql_query(mysql, "SET SESSION wait_timeout=5"); + check_mysql_rc(rc, mysql); + + last_thread_id= mysql_thread_id(mysql); if ((rc= mysql_query(mysql, "SELECT 1")) || (result= mysql_store_result(mysql)) == NULL) check_mysql_rc(rc, mysql); mysql_free_result(result); sleep(10); - if ((rc= mysql_query(mysql, "SELECT 2")) || (result= mysql_store_result(mysql)) == NULL) - check_mysql_rc(rc, mysql); - mysql_free_result(result); + rc= mysql_query(mysql, "SELECT 2"); + check_mysql_rc(rc, mysql); + if (result= mysql_store_result(mysql)) + mysql_free_result(result); FAIL_IF(mysql_thread_id(mysql) == last_thread_id, "Expected new connection id"); last_thread_id= mysql_thread_id(mysql); @@ -2414,8 +2485,12 @@ mysql_close(mysql); return OK; } +#endif struct my_tests_st my_tests[] = { +#ifdef WIN32 + {"test_conc760", test_conc760, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, +#endif {"test_conc589", test_conc589, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, #ifdef HAVE_test_conc748 {"test_conc748", test_conc748, TEST_CONNECTION_NONE, 0, NULL, NULL}, diff -Nru mariadb-10.11.11/libmariadb/unittest/libmariadb/errors.c mariadb-10.11.13/libmariadb/unittest/libmariadb/errors.c --- mariadb-10.11.11/libmariadb/unittest/libmariadb/errors.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/unittest/libmariadb/errors.c 2025-05-19 16:14:27.000000000 +0000 @@ -272,8 +272,82 @@ return OK; } +#define TEST_ARRAY_SIZE 1024 + +static int test_mdev35935(MYSQL *mysql) +{ + MYSQL_STMT *stmt= mysql_stmt_init(mysql); + const char *stmt_str= "INSERT INTO bulk1 (a,b) VALUES (?,?)"; + unsigned int array_size= TEST_ARRAY_SIZE; + int rc; + unsigned int i; + char **buffer; + unsigned long *lengths; + unsigned int *vals; + MYSQL_BIND bind[2]; + const char *data= "test"; + + SKIP_MAXSCALE; + SKIP_MYSQL(mysql); + + rc= mysql_select_db(mysql, schema); + + rc= mysql_query(mysql, "DROP TABLE IF EXISTS bulk1"); + check_mysql_rc(rc, mysql); + + rc= mysql_query(mysql, "CREATE TABLE bulk1 (a int , b VARCHAR(255))"); + check_mysql_rc(rc, mysql); + + rc= mysql_stmt_prepare(stmt, SL(stmt_str)); + check_stmt_rc(rc, stmt); + + rc= mysql_query(mysql, "ALTER TABLE bulk1 ADD c int"); + check_mysql_rc(rc, mysql); + + /* allocate memory */ + buffer= calloc(TEST_ARRAY_SIZE, sizeof(char *)); + lengths= calloc(TEST_ARRAY_SIZE, sizeof *lengths); + vals= calloc(TEST_ARRAY_SIZE, sizeof *vals); + + for (i=0; i < TEST_ARRAY_SIZE; i++) + { + buffer[i]= (void *)data; + lengths[i]= -1; + vals[i]= i; + } + + memset(bind, 0, sizeof(MYSQL_BIND) * 2); + bind[0].buffer_type= MYSQL_TYPE_LONG; + bind[0].buffer= vals; + bind[1].buffer_type= MYSQL_TYPE_STRING; + bind[1].buffer= (void *)buffer; + bind[1].length= (unsigned long *)lengths; + + rc= mysql_stmt_attr_set(stmt, STMT_ATTR_ARRAY_SIZE, &array_size); + check_stmt_rc(rc, stmt); + + rc= mysql_stmt_bind_param(stmt, bind); + check_stmt_rc(rc, stmt); + + if ((rc= mysql_stmt_execute(stmt))) + { + FAIL_IF((!mysql_stmt_errno(stmt) || !mysql_errno(mysql)), "Error number > 0 expected"); + } + + mysql_stmt_close(stmt); + rc= mysql_query(mysql, "DROP TABLE IF EXISTS bulk1"); + check_mysql_rc(rc, mysql); + + free(buffer); + free(lengths); + free(vals); + return OK; +} + + struct my_tests_st my_tests[] = { + {"test_mdev35935", test_mdev35935, TEST_CONNECTION_DEFAULT, 0, NULL , NULL}, {"test_client_warnings", test_client_warnings, TEST_CONNECTION_DEFAULT, 0, NULL , NULL}, {"test_ps_client_warnings", test_ps_client_warnings, TEST_CONNECTION_DEFAULT, 0, NULL , NULL}, {"test_server_warnings", test_server_warnings, TEST_CONNECTION_DEFAULT, 0, NULL , NULL}, diff -Nru mariadb-10.11.11/libmariadb/unittest/libmariadb/ps_bugs.c mariadb-10.11.13/libmariadb/unittest/libmariadb/ps_bugs.c --- mariadb-10.11.11/libmariadb/unittest/libmariadb/ps_bugs.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/libmariadb/unittest/libmariadb/ps_bugs.c 2025-05-19 16:14:27.000000000 +0000 @@ -5001,7 +5001,7 @@ for (i=0; i < 10; i++, frac=frac*10+i) { - unsigned long expected= 0; + unsigned int expected= frac; sprintf(query, "SELECT '2018-11-05 22:25:59.%ld'", frac); diag("%d: %s", i, query); @@ -5027,11 +5027,15 @@ diag("second_part: %ld", tm.second_part); - expected= i > 6 ? 123456 : frac * (unsigned int)powl(10, (6 - i)); + while (expected && expected < 100000) + expected *= 10; + while (expected >= 1000000) + expected /= 10; if (tm.second_part != expected) { - diag("Error: tm.second_part=%ld expected=%ld", tm.second_part, expected); + diag("Error: tm.second_part=%ld expected=%d", tm.second_part, expected); + mysql_stmt_close(stmt); return FAIL; } } @@ -5618,6 +5622,7 @@ rc= mysql_stmt_attr_set(stmt, STMT_ATTR_CB_PARAM, conc623_param_callback); check_stmt_rc(rc, stmt); + memset(&bind, 0, sizeof(MYSQL_BIND)); bind.buffer_type= MYSQL_TYPE_LONG; rc= mysql_stmt_bind_param(stmt, &bind); check_stmt_rc(rc, stmt); @@ -5910,9 +5915,50 @@ return OK; } +static int test_conc762(MYSQL *mysql) +{ + int rc; + MYSQL_STMT *stmt= mysql_stmt_init(mysql); + MYSQL_BIND bind[2]; + my_bool is_null[2]= {1,1}; + unsigned long length[2]= {1,1}; + + rc= mysql_stmt_prepare(stmt, SL("SELECT NULL, 'foo'")); + check_stmt_rc(rc, stmt); + + memset(&bind, 0, sizeof(MYSQL_BIND) * 2); + + bind[0].buffer_type = MYSQL_TYPE_STRING; + bind[1].buffer_type = MYSQL_TYPE_STRING; + bind[0].is_null= &is_null[0]; + bind[1].is_null= &is_null[1]; + bind[0].buffer_length= bind[1].buffer_length= 0; + bind[0].length= &length[0]; + bind[1].length= &length[1]; + + rc= mysql_stmt_execute(stmt); + check_stmt_rc(rc, stmt); + + rc= mysql_stmt_bind_result(stmt, bind); + + mysql_stmt_fetch(stmt); + FAIL_IF(is_null[0]==0, "Expected NULL value"); + FAIL_IF(is_null[1]==1, "Expected non NULL value"); + FAIL_IF(length[0]!=0, "Expected length=0"); + FAIL_IF(length[1]!=3, "Expected length=3"); + +// FAIL_IF(length[0] != 0, "Expected length=0"); + +//FAIL_IF(length[1] != 3, "Expected length=3)"; + + mysql_stmt_close(stmt); + return OK; +} + struct my_tests_st my_tests[] = { {"test_conc702", test_conc702, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, + {"test_conc762", test_conc762, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, {"test_conc176", test_conc176, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, {"test_conc739", test_conc739, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, {"test_conc633", test_conc633, TEST_CONNECTION_DEFAULT, 0, NULL, NULL}, diff -Nru mariadb-10.11.11/mysql-test/CMakeLists.txt mariadb-10.11.13/mysql-test/CMakeLists.txt --- mariadb-10.11.11/mysql-test/CMakeLists.txt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/CMakeLists.txt 2025-05-19 16:14:24.000000000 +0000 @@ -14,7 +14,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA -INSTALL_MYSQL_TEST("." ".") +INSTALL_MYSQL_TEST("." "") IF(NOT ${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) # Enable running mtr from build directory diff -Nru mariadb-10.11.11/mysql-test/include/long_test.inc mariadb-10.11.13/mysql-test/include/long_test.inc --- mariadb-10.11.11/mysql-test/include/long_test.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/include/long_test.inc 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ # We use this --source include to mark a test as taking long to run. # We can use this to schedule such test early (to not be left with -# only one or two long tests running, and rests of works idle), or to +# only one or two long tests running, and rests of workers idle), or to # run a quick test skipping long-running test cases. --source include/no_valgrind_without_big.inc diff -Nru mariadb-10.11.11/mysql-test/lib/My/SafeProcess/safe_process.cc mariadb-10.11.13/mysql-test/lib/My/SafeProcess/safe_process.cc --- mariadb-10.11.11/mysql-test/lib/My/SafeProcess/safe_process.cc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/lib/My/SafeProcess/safe_process.cc 2025-05-19 16:14:24.000000000 +0000 @@ -220,6 +220,7 @@ pid_t own_pid= getpid(); pid_t parent_pid= getppid(); bool nocore = false; + int open_files_limit = 1024; struct sigaction sa,sa_abort; sa.sa_handler= handle_signal; @@ -268,7 +269,14 @@ } else if ( strncmp (arg, "--env ", 6) == 0 ) { - putenv(strdup(arg+6)); + putenv(strdup(arg+6)); + } + else if ( strncmp(arg, "--open-files-limit=", 19) == 0 ) + { + const char* start = arg + 19; + open_files_limit = atoi(start); + if (open_files_limit <= 0) + die("Invalid value '%s' passed to --open-files-limit", start); } else die("Unknown option: %s", arg); @@ -318,11 +326,8 @@ if (nocore) setlimit(RLIMIT_CORE, 0, 0); - /* - mysqld defaults depend on that. make test results stable and independent - from the environment - */ - setlimit(RLIMIT_NOFILE, 1024, 1024); + // Set open files limit + setlimit(RLIMIT_NOFILE, open_files_limit, open_files_limit); // Signal that child is ready buf= 37; diff -Nru mariadb-10.11.11/mysql-test/lib/My/SafeProcess.pm mariadb-10.11.13/mysql-test/lib/My/SafeProcess.pm --- mariadb-10.11.11/mysql-test/lib/My/SafeProcess.pm 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/lib/My/SafeProcess.pm 2025-05-19 16:14:24.000000000 +0000 @@ -138,6 +138,7 @@ my $error = delete($opts{'error'}); my $verbose = delete($opts{'verbose'}) || $::opt_verbose; my $nocore = delete($opts{'nocore'}); + my $open_files_limit = delete($opts{'open_files_limit'}); my $host = delete($opts{'host'}); my $shutdown = delete($opts{'shutdown'}); my $user_data= delete($opts{'user_data'}); @@ -161,6 +162,8 @@ push(@safe_args, "--verbose") if $verbose > 0; push(@safe_args, "--nocore") if $nocore; + push(@safe_args, "--open-files-limit=$open_files_limit") if $open_files_limit; + # Point the safe_process at the right parent if running on cygwin push(@safe_args, "--parent-pid=".Cygwin::pid_to_winpid($$)) if IS_CYGWIN; diff -Nru mariadb-10.11.11/mysql-test/main/backup_locks.test mariadb-10.11.13/mysql-test/main/backup_locks.test --- mariadb-10.11.11/mysql-test/main/backup_locks.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/backup_locks.test 2025-05-19 16:14:24.000000000 +0000 @@ -2,6 +2,7 @@ # Tests BACKUP STAGE locking ######################################################################## +--source include/long_test.inc --source include/have_innodb.inc --source include/have_metadata_lock_info.inc --source include/not_embedded.inc diff -Nru mariadb-10.11.11/mysql-test/main/comment_database.result mariadb-10.11.13/mysql-test/main/comment_database.result --- mariadb-10.11.11/mysql-test/main/comment_database.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/comment_database.result 2025-05-19 16:14:24.000000000 +0000 @@ -76,3 +76,16 @@ CATALOG_NAME SCHEMA_NAME DEFAULT_CHARACTER_SET_NAME DEFAULT_COLLATION_NAME SQL_PATH SCHEMA_COMMENT def comment latin2 latin2_general_ci NULL comment DROP DATABASE comment; +CREATE DATABASE db1; +# restart +SHOW CREATE DATABASE db1; +Database Create Database +db1 CREATE DATABASE `db1` /*!40100 DEFAULT CHARACTER SET latin1 COLLATE latin1_swedish_ci */ +Warnings: +Note 1105 Database 'db1' does not have a db.opt file. You can create one with ALTER DATABASE if needed +SHOW CREATE DATABASE db1; +Database Create Database +db1 CREATE DATABASE `db1` /*!40100 DEFAULT CHARACTER SET latin1 COLLATE latin1_swedish_ci */ +Warnings: +Note 1105 Database 'db1' does not have a db.opt file. You can create one with ALTER DATABASE if needed +DROP DATABASE db1; diff -Nru mariadb-10.11.11/mysql-test/main/comment_database.test mariadb-10.11.13/mysql-test/main/comment_database.test --- mariadb-10.11.11/mysql-test/main/comment_database.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/comment_database.test 2025-05-19 16:14:24.000000000 +0000 @@ -63,3 +63,11 @@ WHERE schema_name='comment'; DROP DATABASE comment; --enable_service_connection + +CREATE DATABASE db1; +--remove_file $MARIADB_DATADIR/db1/db.opt +--source include/restart_mysqld.inc +# We need to call this two times to ensure all code paths are used +SHOW CREATE DATABASE db1; +SHOW CREATE DATABASE db1; +DROP DATABASE db1; diff -Nru mariadb-10.11.11/mysql-test/main/ctype_utf8_def_upgrade.result mariadb-10.11.13/mysql-test/main/ctype_utf8_def_upgrade.result --- mariadb-10.11.11/mysql-test/main/ctype_utf8_def_upgrade.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/ctype_utf8_def_upgrade.result 2025-05-19 16:14:24.000000000 +0000 @@ -53,6 +53,8 @@ SHOW CREATE DATABASE db1; Database Create Database db1 CREATE DATABASE `db1` /*!40100 DEFAULT CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci */ +Warnings: +Note 1105 Database 'db1' does not have a db.opt file. You can create one with ALTER DATABASE if needed USE db1; SELECT @@character_set_database, 'taken from defaults' AS comment; @@character_set_database comment diff -Nru mariadb-10.11.11/mysql-test/main/derived_cond_pushdown.result mariadb-10.11.13/mysql-test/main/derived_cond_pushdown.result --- mariadb-10.11.11/mysql-test/main/derived_cond_pushdown.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/derived_cond_pushdown.result 2025-05-19 16:14:24.000000000 +0000 @@ -11761,9 +11761,8 @@ EXPLAIN INSERT INTO t1 SELECT * FROM ( SELECT t1.f FROM v1 JOIN t1 ) AS t WHERE f IS NOT NULL; id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY ALL NULL NULL NULL NULL 144 Using where -2 DERIVED ALL NULL NULL NULL NULL 12 -2 DERIVED t1 ALL NULL NULL NULL NULL 12 Using where; Using join buffer (flat, BNL join) +1 PRIMARY ALL NULL NULL NULL NULL 12 Using temporary +1 PRIMARY t1 ALL NULL NULL NULL NULL 12 Using where; Using join buffer (flat, BNL join) 4 DERIVED t1 ALL NULL NULL NULL NULL 12 EXPLAIN FORMAT=JSON INSERT INTO t1 SELECT * FROM ( SELECT t1.f FROM v1 JOIN t1 ) AS t WHERE f IS NOT NULL; @@ -11771,61 +11770,47 @@ { "query_block": { "select_id": 1, - "nested_loop": [ - { - "table": { - "table_name": "", - "access_type": "ALL", - "rows": 144, - "filtered": 100, - "attached_condition": "t.f is not null", - "materialized": { - "query_block": { - "select_id": 2, - "nested_loop": [ - { - "table": { - "table_name": "", - "access_type": "ALL", - "rows": 12, - "filtered": 100, - "materialized": { - "query_block": { - "select_id": 4, - "nested_loop": [ - { - "table": { - "table_name": "t1", - "access_type": "ALL", - "rows": 12, - "filtered": 100 - } - } - ] - } - } - } - }, - { - "block-nl-join": { + "temporary_table": { + "nested_loop": [ + { + "table": { + "table_name": "", + "access_type": "ALL", + "rows": 12, + "filtered": 100, + "materialized": { + "query_block": { + "select_id": 4, + "nested_loop": [ + { "table": { "table_name": "t1", "access_type": "ALL", "rows": 12, - "filtered": 100, - "attached_condition": "t1.f is not null" - }, - "buffer_type": "flat", - "buffer_size": "64", - "join_type": "BNL" + "filtered": 100 + } } - } - ] + ] + } } } + }, + { + "block-nl-join": { + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 12, + "filtered": 100, + "attached_condition": "t1.f is not null" + }, + "buffer_type": "flat", + "buffer_size": "64", + "join_type": "BNL" + } } - } - ] + ] + } } } SELECT * FROM t1; @@ -11854,62 +11839,48 @@ { "query_block": { "select_id": 1, - "nested_loop": [ - { - "table": { - "table_name": "", - "access_type": "ALL", - "rows": 16, - "filtered": 100, - "attached_condition": "t.f is not null", - "materialized": { - "query_block": { - "select_id": 2, - "nested_loop": [ - { - "table": { - "table_name": "t1", - "access_type": "ALL", - "rows": 8, - "filtered": 100, - "attached_condition": "t1.f is not null" - } - }, - { - "table": { - "table_name": "", - "access_type": "ref", - "possible_keys": ["key0"], - "key": "key0", - "key_length": "4", - "used_key_parts": ["f"], - "ref": ["test.t1.f"], - "rows": 2, - "filtered": 100, - "materialized": { - "query_block": { - "select_id": 4, - "nested_loop": [ - { - "table": { - "table_name": "t1", - "access_type": "ALL", - "rows": 8, - "filtered": 100, - "attached_condition": "t1.f is not null" - } - } - ] - } + "temporary_table": { + "nested_loop": [ + { + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 8, + "filtered": 100, + "attached_condition": "t1.f is not null" + } + }, + { + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "4", + "used_key_parts": ["f"], + "ref": ["test.t1.f"], + "rows": 2, + "filtered": 100, + "materialized": { + "query_block": { + "select_id": 4, + "nested_loop": [ + { + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 8, + "filtered": 100, + "attached_condition": "t1.f is not null" } } - } - ] + ] + } } } } - } - ] + ] + } } } SELECT * FROM t1; @@ -21669,6 +21640,27 @@ GROUP BY 1 ; ( SELECT 1 FROM ( SELECT 1 FROM cte1) dt GROUP BY x HAVING x= 1 ) 1 +create table t1 (f int); +create view v1 as select f, count(*) c from t1 group by f; +# +# MDEV-25012 Server crash in find_field_in_tables, Assertion `name' failed in find_field_in_table_ref +# +select * from v1 where export_set(1, default(f), 'x', aes_decrypt('secret', f)); +f c +show warnings; +Level Code Message +drop view v1; +drop table t1; +create table t(c3 longtext) ; +with cte1 as +( +select default(c3) as a +from t group by 1 +) +select * from cte1 +where cte1.a >= 1; +a +drop table t; # End of 10.5 tests # # MDEV-28958: condition pushable into view after simplification diff -Nru mariadb-10.11.11/mysql-test/main/derived_cond_pushdown.test mariadb-10.11.13/mysql-test/main/derived_cond_pushdown.test --- mariadb-10.11.11/mysql-test/main/derived_cond_pushdown.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/derived_cond_pushdown.test 2025-05-19 16:14:24.000000000 +0000 @@ -4271,6 +4271,28 @@ FROM cte2 GROUP BY 1 ; +create table t1 (f int); +create view v1 as select f, count(*) c from t1 group by f; + +--echo # +--echo # MDEV-25012 Server crash in find_field_in_tables, Assertion `name' failed in find_field_in_table_ref +--echo # +select * from v1 where export_set(1, default(f), 'x', aes_decrypt('secret', f)); +show warnings; +# cleanup +drop view v1; +drop table t1; + +create table t(c3 longtext) ; +with cte1 as +( + select default(c3) as a + from t group by 1 +) +select * from cte1 +where cte1.a >= 1; +drop table t; + --echo # End of 10.5 tests --echo # diff -Nru mariadb-10.11.11/mysql-test/main/derived_view.result mariadb-10.11.13/mysql-test/main/derived_view.result --- mariadb-10.11.11/mysql-test/main/derived_view.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/derived_view.result 2025-05-19 16:14:24.000000000 +0000 @@ -2461,6 +2461,8 @@ a 1 1 +1 +1 drop table t1,t2; set optimizer_switch=@save968720_optimizer_switch; # diff -Nru mariadb-10.11.11/mysql-test/main/func_json.result mariadb-10.11.13/mysql-test/main/func_json.result --- mariadb-10.11.11/mysql-test/main/func_json.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/func_json.result 2025-05-19 16:14:24.000000000 +0000 @@ -1766,6 +1766,43 @@ data # +# MDEV-35614 JSON_UNQUOTE doesn't work with emojis +# +SELECT HEX(JSON_UNQUOTE('"\\ud83d\\ude0a"')) as hex_smiley; +hex_smiley +F09F988A +set names utf8mb4; +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') as smiley; +smiley +😊 +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') = JSON_UNQUOTE('"\\ud83d\\ude0a"') as equal_smileys; +equal_smileys +1 +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') <= JSON_UNQUOTE('"\\ud83d\\ude0a"') as less_or_equal_smileys; +less_or_equal_smileys +1 +set @v='{ "color":"😊" }'; +select @v as v, collation(@v) as collation_v; +v collation_v +{ "color":"😊" } utf8mb4_general_ci +select json_valid(@v) as valid; +valid +1 +select json_extract(@v,'$.color') as color_extraction, collation(json_extract(@v,'$.color')) as color_extraction_collation; +color_extraction color_extraction_collation +"😊" utf8mb4_general_ci +select json_unquote(json_extract(@v,'$.color')) as unquoted, collation(json_unquote(json_extract(@v,'$.color'))) as unquoted_collation; +unquoted unquoted_collation +😊 utf8mb4_bin +SELECT JSON_UNQUOTE('"\\uc080\\ude0a"') as invalid_utf8mb4; +invalid_utf8mb4 +"\uc080\ude0a" +Warnings: +Warning 4035 Broken JSON string in argument 1 to function 'json_unquote' at position 13 +show warnings; +Level Code Message +Warning 4035 Broken JSON string in argument 1 to function 'json_unquote' at position 13 +# # End of 10.6 tests # # diff -Nru mariadb-10.11.11/mysql-test/main/func_json.test mariadb-10.11.13/mysql-test/main/func_json.test --- mariadb-10.11.11/mysql-test/main/func_json.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/func_json.test 2025-05-19 16:14:24.000000000 +0000 @@ -1194,6 +1194,7 @@ SET @@collation_connection= @save_collation_connection; + --echo # --echo # End of 10.5 tests --echo # @@ -1231,6 +1232,27 @@ data FROM JSON_TABLE (@data, '$[*]' COLUMNS (data text PATH '$.Data')) AS t; + +--echo # +--echo # MDEV-35614 JSON_UNQUOTE doesn't work with emojis +--echo # + +SELECT HEX(JSON_UNQUOTE('"\\ud83d\\ude0a"')) as hex_smiley; +set names utf8mb4; +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') as smiley; + +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') = JSON_UNQUOTE('"\\ud83d\\ude0a"') as equal_smileys; +SELECT JSON_UNQUOTE('"\\ud83d\\ude0a"') <= JSON_UNQUOTE('"\\ud83d\\ude0a"') as less_or_equal_smileys; + +set @v='{ "color":"😊" }'; +select @v as v, collation(@v) as collation_v; +select json_valid(@v) as valid; +select json_extract(@v,'$.color') as color_extraction, collation(json_extract(@v,'$.color')) as color_extraction_collation; +select json_unquote(json_extract(@v,'$.color')) as unquoted, collation(json_unquote(json_extract(@v,'$.color'))) as unquoted_collation; + +SELECT JSON_UNQUOTE('"\\uc080\\ude0a"') as invalid_utf8mb4; +show warnings; + --echo # --echo # End of 10.6 tests --echo # diff -Nru mariadb-10.11.11/mysql-test/main/func_like.result mariadb-10.11.13/mysql-test/main/func_like.result --- mariadb-10.11.11/mysql-test/main/func_like.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/func_like.result 2025-05-19 16:14:24.000000000 +0000 @@ -424,3 +424,22 @@ Note 1003 select 1 like `test`.`t1`.`c1` | `test`.`t1`.`c2` AS `1 LIKE c1|c2`,1 like `test`.`t1`.`c1` & `test`.`t1`.`c2` AS `1 LIKE c1&c2`,1 like `test`.`t1`.`c2` >> `test`.`t1`.`c1` AS `1 LIKE c2>>c1`,2 like `test`.`t1`.`c2` << `test`.`t1`.`c1` AS `2 LIKE c2< 0 AS `1 LIKE c1||c2`,2 like `test`.`t1`.`c1` + `test`.`t1`.`c2` AS `2 LIKE c1+c2`,-1 like `test`.`t1`.`c1` - `test`.`t1`.`c2` AS `-1 LIKE c1-c2`,2 like `test`.`t1`.`c1` * `test`.`t1`.`c2` AS `2 LIKE c1*c2`,0.5000 like `test`.`t1`.`c1` / `test`.`t1`.`c2` AS `0.5000 LIKE c1/c2`,0 like `test`.`t1`.`c1` DIV `test`.`t1`.`c2` AS `0 LIKE c1 DIV c2`,0 like `test`.`t1`.`c1` MOD `test`.`t1`.`c2` AS `0 LIKE c1 MOD c2` from `test`.`t1` order by `test`.`t1`.`c2` DROP VIEW v1; DROP TABLE t1; +# +# MDEV-36211 Incorrect query result for binary_column NOT LIKE binary_column +# +CREATE TABLE t1 (c1 BLOB NOT NULL); +INSERT INTO t1 (c1) VALUES (1); +SELECT c1 FROM t1 WHERE c1 NOT LIKE c1; +c1 +SELECT c1 FROM t1 WHERE c1 LIKE c1; +c1 +1 +DROP TABLE t1; +CREATE TABLE t1 (c1 BLOB); +INSERT INTO t1 (c1) VALUES (1); +SELECT c1 FROM t1 WHERE c1 NOT LIKE c1; +c1 +SELECT c1 FROM t1 WHERE c1 LIKE c1; +c1 +1 +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/main/func_like.test mariadb-10.11.13/mysql-test/main/func_like.test --- mariadb-10.11.11/mysql-test/main/func_like.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/func_like.test 2025-05-19 16:14:24.000000000 +0000 @@ -291,3 +291,18 @@ EXPLAIN EXTENDED SELECT * FROM v1; DROP VIEW v1; DROP TABLE t1; + +--echo # +--echo # MDEV-36211 Incorrect query result for binary_column NOT LIKE binary_column +--echo # +CREATE TABLE t1 (c1 BLOB NOT NULL); +INSERT INTO t1 (c1) VALUES (1); +SELECT c1 FROM t1 WHERE c1 NOT LIKE c1; +SELECT c1 FROM t1 WHERE c1 LIKE c1; +DROP TABLE t1; + +CREATE TABLE t1 (c1 BLOB); +INSERT INTO t1 (c1) VALUES (1); +SELECT c1 FROM t1 WHERE c1 NOT LIKE c1; +SELECT c1 FROM t1 WHERE c1 LIKE c1; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/main/func_regexp_pcre.result mariadb-10.11.13/mysql-test/main/func_regexp_pcre.result --- mariadb-10.11.11/mysql-test/main/func_regexp_pcre.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/func_regexp_pcre.result 2025-05-19 16:14:24.000000000 +0000 @@ -60,7 +60,7 @@ INSERT INTO t2 VALUES ('\\p{Cyrillic}'),('\\p{Greek}'),('\\p{Latin}'); INSERT INTO t2 VALUES ('\\p{Han}'),('\\p{Hangul}'); INSERT INTO t2 VALUES ('\\p{Sinhala}'), ('\\p{Tamil}'); -INSERT INTO t2 VALUES ('\\p{L}'),('\\p{Ll}'),('\\p{Lu}'),('\\p{L&}'); +INSERT INTO t2 VALUES ('\\p{L}'), /* buggy before v10.45 ('\\p{Ll}'),('\\p{Lu}'),*/ ('\\p{L&}'); INSERT INTO t2 VALUES ('[[:alpha:]]'),('[[:digit:]]'); SELECT class, ch, ch RLIKE class FROM t1, t2 ORDER BY class, BINARY ch; class ch ch RLIKE class @@ -168,32 +168,6 @@ \p{Latin} à¶´ 0 \p{Latin} ã— 0 \p{Latin} ê°· 0 -\p{Ll} 1 0 -\p{Ll} A 0 -\p{Ll} a 1 -\p{Ll} À 0 -\p{Ll} à 1 -\p{Ll} Σ 0 -\p{Ll} σ 1 -\p{Ll} Я 0 -\p{Ll} Ñ 1 -\p{Ll} ௨ 0 -\p{Ll} à¶´ 0 -\p{Ll} ã— 0 -\p{Ll} ê°· 0 -\p{Lu} 1 0 -\p{Lu} A 1 -\p{Lu} a 0 -\p{Lu} À 1 -\p{Lu} à 0 -\p{Lu} Σ 1 -\p{Lu} σ 0 -\p{Lu} Я 1 -\p{Lu} Ñ 0 -\p{Lu} ௨ 0 -\p{Lu} à¶´ 0 -\p{Lu} ã— 0 -\p{Lu} ê°· 0 \p{L} 1 0 \p{L} A 1 \p{L} a 1 diff -Nru mariadb-10.11.11/mysql-test/main/func_regexp_pcre.test mariadb-10.11.13/mysql-test/main/func_regexp_pcre.test --- mariadb-10.11.11/mysql-test/main/func_regexp_pcre.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/func_regexp_pcre.test 2025-05-19 16:14:24.000000000 +0000 @@ -41,7 +41,7 @@ INSERT INTO t2 VALUES ('\\p{Cyrillic}'),('\\p{Greek}'),('\\p{Latin}'); INSERT INTO t2 VALUES ('\\p{Han}'),('\\p{Hangul}'); INSERT INTO t2 VALUES ('\\p{Sinhala}'), ('\\p{Tamil}'); -INSERT INTO t2 VALUES ('\\p{L}'),('\\p{Ll}'),('\\p{Lu}'),('\\p{L&}'); +INSERT INTO t2 VALUES ('\\p{L}'), /* buggy before v10.45 ('\\p{Ll}'),('\\p{Lu}'),*/ ('\\p{L&}'); INSERT INTO t2 VALUES ('[[:alpha:]]'),('[[:digit:]]'); SELECT class, ch, ch RLIKE class FROM t1, t2 ORDER BY class, BINARY ch; DROP TABLE t1, t2; diff -Nru mariadb-10.11.11/mysql-test/main/gis-precise.result mariadb-10.11.13/mysql-test/main/gis-precise.result --- mariadb-10.11.11/mysql-test/main/gis-precise.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/gis-precise.result 2025-05-19 16:14:24.000000000 +0000 @@ -776,7 +776,7 @@ ST_DISTANCE_SPHERE(1, 1, NULL) NULL SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(1 0)'), ST_GEOMFROMTEXT('LINESTRING(0 0, 1 1)')) as result; -ERROR HY000: Internal error: st_distance_sphere +ERROR HY000: Calling geometry function st_distance_sphere with unsupported types of arguments. # Test Points and radius SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(0 0)'), ST_GEOMFROMTEXT('POINT(1 1)')) as result; result @@ -788,9 +788,9 @@ result 0.024682056391766436 SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(0 0)'), ST_GEOMFROMTEXT('POINT(1 1)'), 0) as result; -ERROR HY000: Internal error: Radius must be greater than zero. +ERROR HY000: Calling geometry function st_distance_sphere with unsupported types of arguments. SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(0 0)'), ST_GEOMFROMTEXT('POINT(1 1)'), -1) as result; -ERROR HY000: Internal error: Radius must be greater than zero. +ERROR HY000: Calling geometry function st_distance_sphere with unsupported types of arguments. # Test longitude/lattitude SELECT TRUNCATE(ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(0 1)'), ST_GEOMFROMTEXT('POINT(1 2)')), 10) as result; result @@ -843,7 +843,7 @@ result 0.04933028646581131 SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('MULTIPOINT(1 2,1 1 )'), ST_GEOMFROMTEXT('MULTIPOINT(8 9,3 4 )'),0) as result; -ERROR HY000: Internal error: Radius must be greater than zero. +ERROR HY000: Calling geometry function st_distance_sphere with unsupported types of arguments. set @pt1 = ST_GeomFromText('POINT(190 -30)'); set @pt2 = ST_GeomFromText('POINT(-30 50)'); SELECT ST_Distance_Sphere(@pt1, @pt2); diff -Nru mariadb-10.11.11/mysql-test/main/gis-precise.test mariadb-10.11.13/mysql-test/main/gis-precise.test --- mariadb-10.11.11/mysql-test/main/gis-precise.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/gis-precise.test 2025-05-19 16:14:24.000000000 +0000 @@ -422,7 +422,7 @@ # Return NULL if radius is NULL SELECT ST_DISTANCE_SPHERE(1, 1, NULL); # Wrong geometry ---error ER_INTERNAL_ERROR +--error ER_GIS_UNSUPPORTED_ARGUMENT SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(1 0)'), ST_GEOMFROMTEXT('LINESTRING(0 0, 1 1)')) as result; --echo # Test Points and radius @@ -430,9 +430,9 @@ # make bb x86 happy SELECT TRUNCATE(ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(-1 -1)'), ST_GEOMFROMTEXT('POINT(-2 -2)')), 10) as result; SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(0 0)'), ST_GEOMFROMTEXT('POINT(1 1)'), 1) as result; ---error ER_INTERNAL_ERROR +--error ER_GIS_UNSUPPORTED_ARGUMENT SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(0 0)'), ST_GEOMFROMTEXT('POINT(1 1)'), 0) as result; ---error ER_INTERNAL_ERROR +--error ER_GIS_UNSUPPORTED_ARGUMENT SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('POINT(0 0)'), ST_GEOMFROMTEXT('POINT(1 1)'), -1) as result; --echo # Test longitude/lattitude # make bb x86 happy @@ -456,7 +456,7 @@ SELECT TRUNCATE(ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('MULTIPOINT(1 2,1 1 )'), ST_GEOMFROMTEXT('MULTIPOINT(8 9,3 4 )')), 10) as result; # make bb x86 happy SELECT TRUNCATE(ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('MULTIPOINT(1 2,1 1 )'), ST_GEOMFROMTEXT('MULTIPOINT(8 9,3 4 )'),1), 17) as result; ---error ER_INTERNAL_ERROR +--error ER_GIS_UNSUPPORTED_ARGUMENT SELECT ST_DISTANCE_SPHERE(ST_GEOMFROMTEXT('MULTIPOINT(1 2,1 1 )'), ST_GEOMFROMTEXT('MULTIPOINT(8 9,3 4 )'),0) as result; # Longitude out of range [-180,180] diff -Nru mariadb-10.11.11/mysql-test/main/gis.result mariadb-10.11.13/mysql-test/main/gis.result --- mariadb-10.11.11/mysql-test/main/gis.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/gis.result 2025-05-19 16:14:24.000000000 +0000 @@ -5474,4 +5474,36 @@ SELECT NTH_VALUE(a,b) OVER () FROM t; ERROR HY000: Illegal parameter data types point and bigint for operation '-' DROP TABLE t; +# +# MDEV-32619 Settng SRID on geometry with ST_*FromWKKB(g, srid) +# +SELECT +ST_SRID(g1), +ST_SRID(ST_GeomFromWKB(g1, 4326)), +ST_SRID(ST_GeomFromWKB(g1)), +ST_AsText(g1), +ST_SRID(ST_PointFromWKB(g2, 4326)), +ST_SRID(g2), +ST_SRID(ST_LineStringFromWKB(g3, 3)), +ST_SRID(ST_PolygonFromWKB(g4, 4)), +ST_SRID(ST_MultiPointFromWKB(g5, 5)), +ST_SRID(ST_MultiLineStringFromWKB(g6, 6)), +ST_SRID(ST_MultiPolygonFromWKB(g7, 7)) +FROM ( +SELECT +POINT(1, 2) AS g1, +POINT(4, 3) AS g2, +LINESTRING(POINT(4, 3), POINT(4, 4)) AS g3, +POLYGON(LINESTRING(POINT(4, 3), POINT(4, 4), POINT(3, 4), POINT(4, 3))) AS g4, +MULTIPOINT(POINT(4, 3)) AS g5, +MULTILINESTRING(LINESTRING(POINT(4, 3), POINT(4, 4))) AS g6, +MULTIPOLYGON(POLYGON(LINESTRING(POINT(4, 3), POINT(4, 4), POINT(3, 4), POINT(4, 3)))) AS g7 +) AS t; +ST_SRID(g1) ST_SRID(ST_GeomFromWKB(g1, 4326)) ST_SRID(ST_GeomFromWKB(g1)) ST_AsText(g1) ST_SRID(ST_PointFromWKB(g2, 4326)) ST_SRID(g2) ST_SRID(ST_LineStringFromWKB(g3, 3)) ST_SRID(ST_PolygonFromWKB(g4, 4)) ST_SRID(ST_MultiPointFromWKB(g5, 5)) ST_SRID(ST_MultiLineStringFromWKB(g6, 6)) ST_SRID(ST_MultiPolygonFromWKB(g7, 7)) +0 4326 0 POINT(1 2) 4326 0 3 4 5 6 7 +# +# MDEV-35117 Error message "ERROR 1815 (HY000): Internal error: st_distance_sphere' could be improved +# +SELECT ST_DISTANCE_SPHERE(st_geomfromtext('linestring( 2 2, 2 8) '), ST_GeomFromText('POINT(18.413076 43.856258)')) ; +ERROR HY000: Calling geometry function st_distance_sphere with unsupported types of arguments. # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/gis.test mariadb-10.11.13/mysql-test/main/gis.test --- mariadb-10.11.11/mysql-test/main/gis.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/gis.test 2025-05-19 16:14:24.000000000 +0000 @@ -3482,4 +3482,36 @@ SELECT NTH_VALUE(a,b) OVER () FROM t; DROP TABLE t; +--echo # +--echo # MDEV-32619 Settng SRID on geometry with ST_*FromWKKB(g, srid) +--echo # +SELECT + ST_SRID(g1), + ST_SRID(ST_GeomFromWKB(g1, 4326)), + ST_SRID(ST_GeomFromWKB(g1)), + ST_AsText(g1), + ST_SRID(ST_PointFromWKB(g2, 4326)), + ST_SRID(g2), + ST_SRID(ST_LineStringFromWKB(g3, 3)), + ST_SRID(ST_PolygonFromWKB(g4, 4)), + ST_SRID(ST_MultiPointFromWKB(g5, 5)), + ST_SRID(ST_MultiLineStringFromWKB(g6, 6)), + ST_SRID(ST_MultiPolygonFromWKB(g7, 7)) +FROM ( + SELECT + POINT(1, 2) AS g1, + POINT(4, 3) AS g2, + LINESTRING(POINT(4, 3), POINT(4, 4)) AS g3, + POLYGON(LINESTRING(POINT(4, 3), POINT(4, 4), POINT(3, 4), POINT(4, 3))) AS g4, + MULTIPOINT(POINT(4, 3)) AS g5, + MULTILINESTRING(LINESTRING(POINT(4, 3), POINT(4, 4))) AS g6, + MULTIPOLYGON(POLYGON(LINESTRING(POINT(4, 3), POINT(4, 4), POINT(3, 4), POINT(4, 3)))) AS g7 +) AS t; + +--echo # +--echo # MDEV-35117 Error message "ERROR 1815 (HY000): Internal error: st_distance_sphere' could be improved +--echo # +--error ER_GIS_UNSUPPORTED_ARGUMENT +SELECT ST_DISTANCE_SPHERE(st_geomfromtext('linestring( 2 2, 2 8) '), ST_GeomFromText('POINT(18.413076 43.856258)')) ; + --echo # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/group_by.result mariadb-10.11.13/mysql-test/main/group_by.result --- mariadb-10.11.11/mysql-test/main/group_by.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/group_by.result 2025-05-19 16:14:24.000000000 +0000 @@ -2997,5 +2997,79 @@ ERROR 42S22: Reference 'c' not supported (forward reference in item list) DROP TABLE t1; # +# MDEV-35238: Wrong results from a tables with a single record and an aggregate +# +CREATE OR REPLACE TABLE t1 (a int) ENGINE=myisam; +SELECT 1+0, min(1) FROM t1 WHERE if(uuid_short(), a,1); +1+0 min(1) +1 NULL +explain format=json SELECT 1+0, min(1) FROM t1 WHERE if(uuid_short(), a,1); +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "message": "Impossible WHERE noticed after reading const tables" + } + } +} +INSERT INTO t1 VALUES (NULL); +SELECT 1+0, min(1) FROM t1 WHERE if(uuid_short(), a,1); +1+0 min(1) +1 NULL +explain format=json SELECT 1+0, min(1) FROM t1 WHERE if(uuid_short(), a,1); +EXPLAIN +{ + "query_block": { + "select_id": 1, + "pseudo_bits_condition": "if(uuid_short(),NULL,1)", + "nested_loop": [ + { + "table": { + "table_name": "t1", + "access_type": "system", + "rows": 1, + "filtered": 100 + } + } + ] + } +} +DROP TABLE t1; +CREATE TABLE t1 (a int PRIMARY KEY) ENGINE=myisam; +INSERT INTO t1 VALUES (1); +CREATE TABLE t2 (a int NOT NULL) ENGINE=myisam; +INSERT INTO t2 VALUES (10); +SELECT 1+0, MIN(t1.a) FROM t1,t2 WHERE t2.a = rand(); +1+0 MIN(t1.a) +1 1 +explain format=json SELECT 1+0, MIN(t1.a) FROM t1,t2 WHERE t2.a = rand(); +EXPLAIN +{ + "query_block": { + "select_id": 1, + "pseudo_bits_condition": "10 = rand()", + "nested_loop": [ + { + "table": { + "table_name": "t1", + "access_type": "system", + "rows": 1, + "filtered": 100 + } + }, + { + "table": { + "table_name": "t2", + "access_type": "system", + "rows": 1, + "filtered": 100 + } + } + ] + } +} +DROP TABLE t1,t2; +# # End of 10.5 tests # diff -Nru mariadb-10.11.11/mysql-test/main/group_by.test mariadb-10.11.13/mysql-test/main/group_by.test --- mariadb-10.11.11/mysql-test/main/group_by.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/group_by.test 2025-05-19 16:14:24.000000000 +0000 @@ -2153,5 +2153,27 @@ DROP TABLE t1; --echo # +--echo # MDEV-35238: Wrong results from a tables with a single record and an aggregate +--echo # +CREATE OR REPLACE TABLE t1 (a int) ENGINE=myisam; +SELECT 1+0, min(1) FROM t1 WHERE if(uuid_short(), a,1); +explain format=json SELECT 1+0, min(1) FROM t1 WHERE if(uuid_short(), a,1); +INSERT INTO t1 VALUES (NULL); +SELECT 1+0, min(1) FROM t1 WHERE if(uuid_short(), a,1); +explain format=json SELECT 1+0, min(1) FROM t1 WHERE if(uuid_short(), a,1); +DROP TABLE t1; + +CREATE TABLE t1 (a int PRIMARY KEY) ENGINE=myisam; +INSERT INTO t1 VALUES (1); + +CREATE TABLE t2 (a int NOT NULL) ENGINE=myisam; +INSERT INTO t2 VALUES (10); + +SELECT 1+0, MIN(t1.a) FROM t1,t2 WHERE t2.a = rand(); +explain format=json SELECT 1+0, MIN(t1.a) FROM t1,t2 WHERE t2.a = rand(); + +DROP TABLE t1,t2; + +--echo # --echo # End of 10.5 tests --echo # diff -Nru mariadb-10.11.11/mysql-test/main/group_min_max.result mariadb-10.11.13/mysql-test/main/group_min_max.result --- mariadb-10.11.11/mysql-test/main/group_min_max.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/group_min_max.result 2025-05-19 16:14:24.000000000 +0000 @@ -4349,3 +4349,27 @@ # # End of 10.6 tests # +# +# MDEV-36118 Wrong result in loose index scan +# +CREATE TABLE t1 (a int, b int, KEY (a, b)); +insert into t1 values (1, 3), (1, 1); +SELECT MAX(b) FROM t1 WHERE (b > 2 AND b < 4) OR (b = 5) GROUP BY a; +MAX(b) +3 +drop table t1; +# +# MDEV-36220 ASAN unknown-crash in loose index scan of MIN with IS NULL +# +CREATE TABLE t1 (a int, b int, KEY (a, b)); +insert into t1 values (4, NULL), (1, 14), (4, 3); +SELECT MIN(b) FROM t1 WHERE b = 3 OR b IS NULL GROUP BY a; +MIN(b) +3 +SELECT MIN(b) FROM t1 WHERE b IS NULL GROUP BY a; +MIN(b) +NULL +drop table t1; +# +# End of 10.11 tests +# diff -Nru mariadb-10.11.11/mysql-test/main/group_min_max.test mariadb-10.11.13/mysql-test/main/group_min_max.test --- mariadb-10.11.11/mysql-test/main/group_min_max.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/group_min_max.test 2025-05-19 16:14:24.000000000 +0000 @@ -2007,3 +2007,39 @@ --echo # --echo # End of 10.6 tests --echo # + +--echo # +--echo # MDEV-36118 Wrong result in loose index scan +--echo # + +CREATE TABLE t1 (a int, b int, KEY (a, b)); +insert into t1 values (1, 3), (1, 1); +--source include/maybe_debug.inc +if ($have_debug) { + --disable_query_log + set @old_debug=@@debug; + set debug="+d,force_group_by"; + --enable_query_log +} +SELECT MAX(b) FROM t1 WHERE (b > 2 AND b < 4) OR (b = 5) GROUP BY a; +if ($have_debug) { + --disable_query_log + set debug=@old_debug; + --enable_query_log +} + +drop table t1; + +--echo # +--echo # MDEV-36220 ASAN unknown-crash in loose index scan of MIN with IS NULL +--echo # + +CREATE TABLE t1 (a int, b int, KEY (a, b)); +insert into t1 values (4, NULL), (1, 14), (4, 3); +SELECT MIN(b) FROM t1 WHERE b = 3 OR b IS NULL GROUP BY a; +SELECT MIN(b) FROM t1 WHERE b IS NULL GROUP BY a; +drop table t1; + +--echo # +--echo # End of 10.11 tests +--echo # diff -Nru mariadb-10.11.11/mysql-test/main/insert.result mariadb-10.11.13/mysql-test/main/insert.result --- mariadb-10.11.11/mysql-test/main/insert.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/insert.result 2025-05-19 16:14:24.000000000 +0000 @@ -806,5 +806,75 @@ 8 drop table t1; # -# End of 10.5 tests +# MDEV-32086 Server crash when inserting from derived table containing insert target table +# (part 2) +# +create table t1 (pk int, id int); +insert into t1 values (2,2), (3,3), (4,4); +select * from t1; +pk id +2 2 +3 3 +4 4 +select 101+count(*) +from +( +select dt2.id +from (select id from t1) dt2, t1 t where t.id=dt2.id +) dt +where dt.id<1000; +101+count(*) +104 +prepare s from ' +insert into t1 values( + (select 101+count(*) + from + ( + select dt2.id + from (select id from t1) dt2, t1 t where t.id=dt2.id + ) dt + where dt.id<1000 + ), 123 +) +'; +execute s; +select * from t1; +pk id +2 2 +3 3 +4 4 +104 123 +select 101+count(*) +from +( +select dt2.id +from (select id from t1) dt2, t1 t where t.id=dt2.id +) dt +where dt.id<1000; +101+count(*) +105 +execute s; +select * from t1; +pk id +2 2 +3 3 +4 4 +104 123 +105 123 +drop table t1; # +# Try this: INSERT INTO t1 VALUES ... reference to t1 +# RETURNING (subquery not touching t1) +create table t1 (a int, b int); +create table t2 (a int, b int); +# This is accepted: +insert into t1 (a) values +(3), +((select max(a) from t1)) +returning +a, b, (select max(a) from t2); +a b (select max(a) from t2) +3 NULL NULL +NULL NULL NULL +drop table t1,t2; +# End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/insert.test mariadb-10.11.13/mysql-test/main/insert.test --- mariadb-10.11.11/mysql-test/main/insert.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/insert.test 2025-05-19 16:14:24.000000000 +0000 @@ -675,5 +675,59 @@ drop table t1; --echo # ---echo # End of 10.5 tests +--echo # MDEV-32086 Server crash when inserting from derived table containing insert target table +--echo # (part 2) +--echo # + +create table t1 (pk int, id int); +insert into t1 values (2,2), (3,3), (4,4); +select * from t1; +select 101+count(*) + from + ( + select dt2.id + from (select id from t1) dt2, t1 t where t.id=dt2.id + ) dt + where dt.id<1000; +prepare s from ' +insert into t1 values( + (select 101+count(*) + from + ( + select dt2.id + from (select id from t1) dt2, t1 t where t.id=dt2.id + ) dt + where dt.id<1000 + ), 123 +) +'; +execute s; +select * from t1; +select 101+count(*) + from + ( + select dt2.id + from (select id from t1) dt2, t1 t where t.id=dt2.id + ) dt + where dt.id<1000; +execute s; +select * from t1; + +drop table t1; + --echo # +--echo # Try this: INSERT INTO t1 VALUES ... reference to t1 +--echo # RETURNING (subquery not touching t1) +create table t1 (a int, b int); +create table t2 (a int, b int); + +--echo # This is accepted: +insert into t1 (a) values + (3), + ((select max(a) from t1)) +returning + a, b, (select max(a) from t2); + +drop table t1,t2; + +--echo # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/insert_returning.result mariadb-10.11.13/mysql-test/main/insert_returning.result --- mariadb-10.11.11/mysql-test/main/insert_returning.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/insert_returning.result 2025-05-19 16:14:24.000000000 +0000 @@ -498,6 +498,8 @@ 5 6 INSERT INTO t2(id2,val2) VALUES(5,'f') RETURNING (SELECT id2 FROM t2); ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +INSERT INTO t2(id2,val2) VALUES(5,'f') RETURNING (SELECT 1 UNION SELECT id2 FROM t2); +ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data INSERT INTO t2 (id2, val2) VALUES (6,'f') RETURNING t1.*; ERROR 42S02: Unknown table 'test.t1' # diff -Nru mariadb-10.11.11/mysql-test/main/insert_returning.test mariadb-10.11.13/mysql-test/main/insert_returning.test --- mariadb-10.11.11/mysql-test/main/insert_returning.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/insert_returning.test 2025-05-19 16:14:24.000000000 +0000 @@ -199,6 +199,8 @@ t1 WHERE id1=1); --error ER_UPDATE_TABLE_USED INSERT INTO t2(id2,val2) VALUES(5,'f') RETURNING (SELECT id2 FROM t2); +--error ER_UPDATE_TABLE_USED +INSERT INTO t2(id2,val2) VALUES(5,'f') RETURNING (SELECT 1 UNION SELECT id2 FROM t2); --error ER_BAD_TABLE_ERROR INSERT INTO t2 (id2, val2) VALUES (6,'f') RETURNING t1.*; diff -Nru mariadb-10.11.11/mysql-test/main/insert_select.result mariadb-10.11.13/mysql-test/main/insert_select.result --- mariadb-10.11.11/mysql-test/main/insert_select.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/insert_select.result 2025-05-19 16:14:24.000000000 +0000 @@ -1030,6 +1030,139 @@ 3 DROP VIEW v1; DROP TABLE t1; +create table t1 (pk int, id int); +insert into t1 values (2,2), (3,3), (4,4); +insert into t1 +select 1,10 +from +( +select dt2.id from (select id from t1) dt2, t1 t where t.id=dt2.id +) dt +where dt.id=3; +select * from t1; +pk id +2 2 +3 3 +4 4 +1 10 +explain insert into t1 +select 1,10 +from +( +select dt2.id from (select id from t1) dt2, t1 t where t.id=dt2.id +) dt +where dt.id=3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using where; Using temporary +1 SIMPLE t ALL NULL NULL NULL NULL 4 Using where; Using join buffer (flat, BNL join) +explain format=json insert into t1 +select 1,10 +from +( +select dt2.id from (select id from t1) dt2, t1 t where t.id=dt2.id +) dt +where dt.id=3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "temporary_table": { + "nested_loop": [ + { + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 4, + "filtered": 100, + "attached_condition": "t1.`id` = 3" + } + }, + { + "block-nl-join": { + "table": { + "table_name": "t", + "access_type": "ALL", + "rows": 4, + "filtered": 100, + "attached_condition": "t.`id` = 3" + }, + "buffer_type": "flat", + "buffer_size": "65", + "join_type": "BNL" + } + } + ] + } + } +} +prepare stmt from "insert into t1 +select 1,10 +from +( +select dt2.id from (select id from t1) dt2, t1 t where t.id=dt2.id +) dt +where dt.id=3"; +execute stmt; +select * from t1; +pk id +2 2 +3 3 +4 4 +1 10 +1 10 +execute stmt; +select * from t1; +pk id +2 2 +3 3 +4 4 +1 10 +1 10 +1 10 +deallocate prepare stmt; +create procedure p() insert into t1 +select 1,10 +from +( +select dt2.id from (select id from t1) dt2, t1 t where t.id=dt2.id +) dt +where dt.id=3; +call p(); +select * from t1; +pk id +2 2 +3 3 +4 4 +1 10 +1 10 +1 10 +1 10 +call p(); +select * from t1; +pk id +2 2 +3 3 +4 4 +1 10 +1 10 +1 10 +1 10 +1 10 +drop procedure p; +drop table t1; # -# End of 10.5 test +# MDEV-33139: Crash of INSERT SELECT when preparing structures for +# split optimization # +CREATE TABLE v0 ( v1 INT UNIQUE ) ; +INSERT INTO v0 ( v1 ) VALUES +( ( SELECT 1 +FROM +( SELECT v1 +FROM v0 GROUP BY v1 ) AS v6 NATURAL JOIN +v0 AS v2 NATURAL JOIN +v0 AS v4 NATURAL JOIN +v0 AS v3 NATURAL JOIN +( SELECT v1 FROM v0 ) AS v7 ) ) ; +DROP TABLE v0; +# End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/insert_select.test mariadb-10.11.13/mysql-test/main/insert_select.test --- mariadb-10.11.11/mysql-test/main/insert_select.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/insert_select.test 2025-05-19 16:14:24.000000000 +0000 @@ -591,6 +591,60 @@ DROP VIEW v1; DROP TABLE t1; +# +# MDEV-32086: condition pushdown into two mergeable derived tables, +# one containing the other, when they are forced to be +# materialized in INSERT +# +create table t1 (pk int, id int); +insert into t1 values (2,2), (3,3), (4,4); + +let $q= +insert into t1 + select 1,10 + from + ( + select dt2.id from (select id from t1) dt2, t1 t where t.id=dt2.id + ) dt + where dt.id=3; + +eval $q; +select * from t1; + +eval explain $q; +eval explain format=json $q; + +eval prepare stmt from "$q"; +execute stmt; +select * from t1; +execute stmt; +select * from t1; +deallocate prepare stmt; + +eval create procedure p() $q; +call p(); +select * from t1; +call p(); +select * from t1; +drop procedure p; + +drop table t1; + --echo # ---echo # End of 10.5 test +--echo # MDEV-33139: Crash of INSERT SELECT when preparing structures for +--echo # split optimization --echo # + +CREATE TABLE v0 ( v1 INT UNIQUE ) ; +INSERT INTO v0 ( v1 ) VALUES + ( ( SELECT 1 + FROM + ( SELECT v1 + FROM v0 GROUP BY v1 ) AS v6 NATURAL JOIN + v0 AS v2 NATURAL JOIN + v0 AS v4 NATURAL JOIN + v0 AS v3 NATURAL JOIN + ( SELECT v1 FROM v0 ) AS v7 ) ) ; +DROP TABLE v0; + +--echo # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/join.result mariadb-10.11.13/mysql-test/main/join.result --- mariadb-10.11.11/mysql-test/main/join.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/join.result 2025-05-19 16:14:24.000000000 +0000 @@ -3611,3 +3611,32 @@ 1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where 1 SIMPLE t2 ref kp1 kp1 5 test.t1.a 1 Using index condition drop table t1,t2; +# +# MDEV-36592: If the join_condition is specified via USING (column_list), the query plan depends ... +# +CREATE TABLE t1 ( +id int(11), +f1 char(255), +PRIMARY KEY (id) +); +INSERT INTO t1 (id) VALUES (1),(2),(3); +UPDATE t1 SET f1=REPEAT('a',250); +CREATE TABLE t2 (id int(11), f2 INT NOT NULL); +INSERT INTO t2 select seq, seq from seq_1_to_20; +ANALYZE TABLE t1, t2; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +# In both queries, t1 should use type=index, not type=ALL: +EXPLAIN SELECT count(*) FROM t2 JOIN t1 USING (id); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index PRIMARY PRIMARY 4 NULL 3 Using index +1 SIMPLE t2 ALL NULL NULL NULL NULL 20 Using where; Using join buffer (flat, BNL join) +EXPLAIN SELECT count(*) FROM t1 JOIN t2 USING (id); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index PRIMARY PRIMARY 4 NULL 3 Using index +1 SIMPLE t2 ALL NULL NULL NULL NULL 20 Using where; Using join buffer (flat, BNL join) +DROP TABLE t1,t2; +# End of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/main/join.test mariadb-10.11.13/mysql-test/main/join.test --- mariadb-10.11.11/mysql-test/main/join.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/join.test 2025-05-19 16:14:24.000000000 +0000 @@ -2015,3 +2015,28 @@ t2.kp1=t1.a and t2.kp1<=100 and t2.kp2<=20; drop table t1,t2; + +--echo # +--echo # MDEV-36592: If the join_condition is specified via USING (column_list), the query plan depends ... +--echo # +CREATE TABLE t1 ( + id int(11), + f1 char(255), + PRIMARY KEY (id) +); +INSERT INTO t1 (id) VALUES (1),(2),(3); +UPDATE t1 SET f1=REPEAT('a',250); + +CREATE TABLE t2 (id int(11), f2 INT NOT NULL); +INSERT INTO t2 select seq, seq from seq_1_to_20; + +ANALYZE TABLE t1, t2; + +--echo # In both queries, t1 should use type=index, not type=ALL: +EXPLAIN SELECT count(*) FROM t2 JOIN t1 USING (id); +EXPLAIN SELECT count(*) FROM t1 JOIN t2 USING (id); + +DROP TABLE t1,t2; + +--echo # End of 10.11 tests + diff -Nru mariadb-10.11.11/mysql-test/main/join_cache.result mariadb-10.11.13/mysql-test/main/join_cache.result --- mariadb-10.11.11/mysql-test/main/join_cache.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/join_cache.result 2025-05-19 16:14:24.000000000 +0000 @@ -6443,3 +6443,29 @@ # # End of 10.5 tests # +# +# MDEV-36165: BKA join cache buffer is employed despite join_cache_level=3 (flat BNLH) +# +CREATE TABLE t1(a INT); +INSERT INTO t1 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT a, a from t1; +CREATE TABLE t3(a INT, b INT, c INT, key (a,b)); +INSERT INTO t3 select a, a, a FROM t1; +SET optimizer_switch = 'join_cache_hashed=off,join_cache_bka=on,mrr=on'; +SET join_cache_level = 3; +EXPLAIN SELECT * FROM t2, t3 WHERE t2.a=t3.a AND (t3.b+1 <= t2.b+1); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where +1 SIMPLE t3 ref a a 5 test.t2.a 1 Using index condition +SET join_cache_level = 4; +EXPLAIN SELECT * FROM t2, t3 WHERE t2.a=t3.a AND (t3.b+1 <= t2.b+1); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 10 Using where +1 SIMPLE t3 ref a a 5 test.t2.a 1 Using index condition +SET join_cache_level = default; +SET optimizer_switch = default; +DROP TABLE t1, t2, t3; +# +# End of 10.11 tests +# diff -Nru mariadb-10.11.11/mysql-test/main/join_cache.test mariadb-10.11.13/mysql-test/main/join_cache.test --- mariadb-10.11.11/mysql-test/main/join_cache.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/join_cache.test 2025-05-19 16:14:24.000000000 +0000 @@ -4321,3 +4321,30 @@ --echo # --echo # End of 10.5 tests --echo # + +--echo # +--echo # MDEV-36165: BKA join cache buffer is employed despite join_cache_level=3 (flat BNLH) +--echo # +--source include/have_sequence.inc +CREATE TABLE t1(a INT); +INSERT INTO t1 VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +CREATE TABLE t2(a INT, b INT); +INSERT INTO t2 SELECT a, a from t1; +CREATE TABLE t3(a INT, b INT, c INT, key (a,b)); +INSERT INTO t3 select a, a, a FROM t1; + +SET optimizer_switch = 'join_cache_hashed=off,join_cache_bka=on,mrr=on'; + +SET join_cache_level = 3; +EXPLAIN SELECT * FROM t2, t3 WHERE t2.a=t3.a AND (t3.b+1 <= t2.b+1); + +SET join_cache_level = 4; +EXPLAIN SELECT * FROM t2, t3 WHERE t2.a=t3.a AND (t3.b+1 <= t2.b+1); + +SET join_cache_level = default; +SET optimizer_switch = default; +DROP TABLE t1, t2, t3; + +--echo # +--echo # End of 10.11 tests +--echo # diff -Nru mariadb-10.11.11/mysql-test/main/join_nested.result mariadb-10.11.13/mysql-test/main/join_nested.result --- mariadb-10.11.11/mysql-test/main/join_nested.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/join_nested.result 2025-05-19 16:14:24.000000000 +0000 @@ -2051,3 +2051,15 @@ DROP TABLE t1, t2, t3; set join_cache_level= @save_join_cache_level; # end of 10.3 tests +# +# MDEV-32084: Assertion in best_extension_by_limited_search(), or crash elsewhere in release +# +CREATE TABLE t1 (i int); +INSERT INTO t1 values (1),(2); +SELECT 1 FROM t1 WHERE i IN +(SELECT 1 FROM t1 c +LEFT JOIN (t1 a LEFT JOIN t1 b ON t1.i = b.i) ON c.i = t1.i); +1 +1 +DROP TABLE t1; +# end of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/main/join_nested.test mariadb-10.11.13/mysql-test/main/join_nested.test --- mariadb-10.11.11/mysql-test/main/join_nested.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/join_nested.test 2025-05-19 16:14:24.000000000 +0000 @@ -1458,3 +1458,16 @@ set join_cache_level= @save_join_cache_level; --echo # end of 10.3 tests + +--echo # +--echo # MDEV-32084: Assertion in best_extension_by_limited_search(), or crash elsewhere in release +--echo # +CREATE TABLE t1 (i int); +INSERT INTO t1 values (1),(2); + +SELECT 1 FROM t1 WHERE i IN + (SELECT 1 FROM t1 c + LEFT JOIN (t1 a LEFT JOIN t1 b ON t1.i = b.i) ON c.i = t1.i); + +DROP TABLE t1; +--echo # end of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/main/join_nested_jcl6.result mariadb-10.11.13/mysql-test/main/join_nested_jcl6.result --- mariadb-10.11.11/mysql-test/main/join_nested_jcl6.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/join_nested_jcl6.result 2025-05-19 16:14:24.000000000 +0000 @@ -2060,6 +2060,18 @@ DROP TABLE t1, t2, t3; set join_cache_level= @save_join_cache_level; # end of 10.3 tests +# +# MDEV-32084: Assertion in best_extension_by_limited_search(), or crash elsewhere in release +# +CREATE TABLE t1 (i int); +INSERT INTO t1 values (1),(2); +SELECT 1 FROM t1 WHERE i IN +(SELECT 1 FROM t1 c +LEFT JOIN (t1 a LEFT JOIN t1 b ON t1.i = b.i) ON c.i = t1.i); +1 +1 +DROP TABLE t1; +# end of 10.11 tests CREATE TABLE t5 (a int, b int, c int, PRIMARY KEY(a), KEY b_i (b)); CREATE TABLE t6 (a int, b int, c int, PRIMARY KEY(a), KEY b_i (b)); CREATE TABLE t7 (a int, b int, c int, PRIMARY KEY(a), KEY b_i (b)); diff -Nru mariadb-10.11.11/mysql-test/main/large_pages.opt mariadb-10.11.13/mysql-test/main/large_pages.opt --- mariadb-10.11.11/mysql-test/main/large_pages.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/large_pages.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1 +1 @@ ---large-pages +--large-pages --loose-innodb-buffer-pool-size-max=16m diff -Nru mariadb-10.11.11/mysql-test/main/large_pages.result mariadb-10.11.13/mysql-test/main/large_pages.result --- mariadb-10.11.11/mysql-test/main/large_pages.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/large_pages.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ call mtr.add_suppression("\\[Warning\\] (mysqld|mariadbd): Couldn't allocate [0-9]+ bytes \\((Large/HugeTLB memory|MEMLOCK) page size [0-9]+\\).*"); +call mtr.add_suppression("\\[ERROR\\]*Lock Pages in memory access rights required.*"); create table t1 ( a int not null auto_increment, b char(16) not null, diff -Nru mariadb-10.11.11/mysql-test/main/large_pages.test mariadb-10.11.13/mysql-test/main/large_pages.test --- mariadb-10.11.11/mysql-test/main/large_pages.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/large_pages.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,11 +1,9 @@ # Test of large pages (or at least the fallback to conventional allocation) -# Windows needs SeLockMemoryPrivilege ---source include/not_windows.inc --source include/have_innodb.inc call mtr.add_suppression("\\[Warning\\] (mysqld|mariadbd): Couldn't allocate [0-9]+ bytes \\((Large/HugeTLB memory|MEMLOCK) page size [0-9]+\\).*"); - +call mtr.add_suppression("\\[ERROR\\]*Lock Pages in memory access rights required.*"); create table t1 ( a int not null auto_increment, b char(16) not null, diff -Nru mariadb-10.11.11/mysql-test/main/long_unique.result mariadb-10.11.13/mysql-test/main/long_unique.result --- mariadb-10.11.11/mysql-test/main/long_unique.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/long_unique.result 2025-05-19 16:14:24.000000000 +0000 @@ -1452,4 +1452,26 @@ # CREATE TABLE t1 (pk INT, a TEXT NOT NULL DEFAULT '', PRIMARY KEY (pk), b INT AUTO_INCREMENT, UNIQUE(b), UNIQUE (a,b)) ENGINE=myisam; ERROR HY000: AUTO_INCREMENT column `b` cannot be used in the UNIQUE index `a` +# +# MDEV-35620 UBSAN: runtime error: applying zero offset to null pointer in _ma_unique_hash, skip_trailing_space, my_hash_sort_mb_nopad_bin and my_strnncollsp_utf8mb4_bin +# +# Disable result log. The exact result is not important. +# We just need to make sure UBSAN nullptr-with-offset is not reported. +SELECT DISTINCT user,authentication_string FROM mysql.user; +SELECT DISTINCT USER,PASSWORD FROM mysql.user; +SELECT DISTINCT USER,plugin FROM mysql.user; +# Enabling result log again. +create or replace table t1 (t text) engine=aria; +insert into t1 values (''); +insert into t1 values (NULL); +select distinct t from t1; +t + +NULL +alter table t1 ENGINE=MyISAM; +select distinct t from t1; +t + +NULL +DROP TABLE t1; # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/long_unique.test mariadb-10.11.13/mysql-test/main/long_unique.test --- mariadb-10.11.11/mysql-test/main/long_unique.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/long_unique.test 2025-05-19 16:14:24.000000000 +0000 @@ -551,4 +551,26 @@ --error ER_NO_AUTOINCREMENT_WITH_UNIQUE CREATE TABLE t1 (pk INT, a TEXT NOT NULL DEFAULT '', PRIMARY KEY (pk), b INT AUTO_INCREMENT, UNIQUE(b), UNIQUE (a,b)) ENGINE=myisam; +--echo # +--echo # MDEV-35620 UBSAN: runtime error: applying zero offset to null pointer in _ma_unique_hash, skip_trailing_space, my_hash_sort_mb_nopad_bin and my_strnncollsp_utf8mb4_bin +--echo # + +--echo # Disable result log. The exact result is not important. +--echo # We just need to make sure UBSAN nullptr-with-offset is not reported. +--disable_result_log +SELECT DISTINCT user,authentication_string FROM mysql.user; +SELECT DISTINCT USER,PASSWORD FROM mysql.user; +SELECT DISTINCT USER,plugin FROM mysql.user; +--enable_result_log +--echo # Enabling result log again. + +create or replace table t1 (t text) engine=aria; +insert into t1 values (''); +insert into t1 values (NULL); +select distinct t from t1; +alter table t1 ENGINE=MyISAM; +select distinct t from t1; +DROP TABLE t1; + + --echo # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/lowercase_table2.result mariadb-10.11.13/mysql-test/main/lowercase_table2.result --- mariadb-10.11.11/mysql-test/main/lowercase_table2.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/lowercase_table2.result 2025-05-19 16:14:24.000000000 +0000 @@ -185,7 +185,7 @@ select TABLE_SCHEMA,TABLE_NAME FROM information_schema.TABLES where TABLE_SCHEMA ='mysqltest_LC2'; TABLE_SCHEMA TABLE_NAME -mysqltest_lc2 myUC +mysqltest_LC2 myUC use test; drop database mysqltest_LC2; # diff -Nru mariadb-10.11.11/mysql-test/main/lowercase_view.result mariadb-10.11.13/mysql-test/main/lowercase_view.result --- mariadb-10.11.11/mysql-test/main/lowercase_view.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/lowercase_view.result 2025-05-19 16:14:24.000000000 +0000 @@ -16,29 +16,17 @@ create view v2aA as select * from v1aA; create view v3Aa as select v2Aa.col1 from v2aA,t2Aa where v2Aa.col1 = t2aA.col1; insert into v2Aa values ((select max(col1) from v1aA)); -ERROR HY000: The definition of table 'v1aA' prevents operation INSERT on table 'v2Aa' insert into t1aA values ((select max(col1) from v1Aa)); -ERROR HY000: The definition of table 'v1Aa' prevents operation INSERT on table 't1aA' insert into v2aA values ((select max(col1) from v1aA)); -ERROR HY000: The definition of table 'v1aA' prevents operation INSERT on table 'v2aA' insert into v2Aa values ((select max(col1) from t1Aa)); -ERROR HY000: The definition of table 'v2Aa' prevents operation INSERT on table 'v2Aa' insert into t1aA values ((select max(col1) from t1Aa)); -ERROR HY000: Table 't1aA' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into v2aA values ((select max(col1) from t1aA)); -ERROR HY000: The definition of table 'v2aA' prevents operation INSERT on table 'v2aA' insert into v2Aa values ((select max(col1) from v2aA)); -ERROR HY000: Table 'v2Aa' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into t1Aa values ((select max(col1) from v2Aa)); -ERROR HY000: The definition of table 'v2Aa' prevents operation INSERT on table 't1Aa' insert into v2aA values ((select max(col1) from v2Aa)); -ERROR HY000: Table 'v2aA' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into v3Aa (col1) values ((select max(col1) from v1Aa)); -ERROR HY000: The definition of table 'v1Aa' prevents operation INSERT on table 'v3Aa' insert into v3aA (col1) values ((select max(col1) from t1aA)); -ERROR HY000: The definition of table 'v3aA' prevents operation INSERT on table 'v3aA' insert into v3Aa (col1) values ((select max(col1) from v2aA)); -ERROR HY000: The definition of table 'v2aA' prevents operation INSERT on table 'v3Aa' drop view v3aA,v2Aa,v1aA; drop table t1Aa,t2Aa; create table t1Aa (col1 int); diff -Nru mariadb-10.11.11/mysql-test/main/lowercase_view.test mariadb-10.11.13/mysql-test/main/lowercase_view.test --- mariadb-10.11.11/mysql-test/main/lowercase_view.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/lowercase_view.test 2025-05-19 16:14:24.000000000 +0000 @@ -23,29 +23,17 @@ create view v1Aa as select * from t1aA; create view v2aA as select * from v1aA; create view v3Aa as select v2Aa.col1 from v2aA,t2Aa where v2Aa.col1 = t2aA.col1; --- error 1443 insert into v2Aa values ((select max(col1) from v1aA)); --- error 1443 insert into t1aA values ((select max(col1) from v1Aa)); --- error 1443 insert into v2aA values ((select max(col1) from v1aA)); --- error 1443 insert into v2Aa values ((select max(col1) from t1Aa)); --- error 1093 insert into t1aA values ((select max(col1) from t1Aa)); --- error 1443 insert into v2aA values ((select max(col1) from t1aA)); --- error 1093 insert into v2Aa values ((select max(col1) from v2aA)); --- error 1443 insert into t1Aa values ((select max(col1) from v2Aa)); --- error 1093 insert into v2aA values ((select max(col1) from v2Aa)); --- error 1443 insert into v3Aa (col1) values ((select max(col1) from v1Aa)); --- error 1443 insert into v3aA (col1) values ((select max(col1) from t1aA)); --- error 1443 insert into v3Aa (col1) values ((select max(col1) from v2aA)); drop view v3aA,v2Aa,v1aA; drop table t1Aa,t2Aa; diff -Nru mariadb-10.11.11/mysql-test/main/mariadb-upgrade-service.result mariadb-10.11.13/mysql-test/main/mariadb-upgrade-service.result --- mariadb-10.11.11/mysql-test/main/mariadb-upgrade-service.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mariadb-upgrade-service.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,35 @@ +use mysql; +# run mysql_install_db with --service parameter +# Start service +# -- Upgrade service (online) -- +Phase 1/10: Stopping service +Phase 2/10: Start and stop server in the old version, to avoid crash recovery (skipped) +Phase 3/10: Fixing server config file +Phase 4/10: Starting mysqld for upgrade +Phase 5/10: Waiting for startup to complete +Phase 6/10: Running mysql_upgrade +Phase 7/10: Changing service configuration +Phase 8/10: Initiating server shutdown +Phase 9/10: Waiting for shutdown to complete +Phase 10/10: Starting service +Service 'SERVICE_NAME' successfully upgraded. +Log file is written to UPGRADE_LOG +# upgrade_success(online)=1 +# Service stopped +# -- Upgrade service (offline) -- +Phase 1/10: Stopping service +Phase 2/10: Start and stop server in the old version, to avoid crash recovery ,this can take some time +Phase 3/10: Fixing server config file +Phase 4/10: Starting mysqld for upgrade +Phase 5/10: Waiting for startup to complete +Phase 6/10: Running mysql_upgrade +Phase 7/10: Changing service configuration +Phase 8/10: Initiating server shutdown +Phase 9/10: Waiting for shutdown to complete +Phase 10/10: Starting service (skipped) +Service 'SERVICE_NAME' successfully upgraded. +Log file is written to UPGRADE_LOG +# upgrade_success(offline)=1 +# Delete service +connection default; +# restart diff -Nru mariadb-10.11.11/mysql-test/main/mariadb-upgrade-service.test mariadb-10.11.13/mysql-test/main/mariadb-upgrade-service.test --- mariadb-10.11.11/mysql-test/main/mariadb-upgrade-service.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mariadb-upgrade-service.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,113 @@ +source include/windows.inc; +let $datadir_name=data; +let $service_name_prefix=mariadb; +let $password=password; + +source include/check_windows_admin.inc; + +# The test uses return code from sc.exe utility, which are as follows +let $ERROR_SERVICE_DOES_NOT_EXIST= 1060; +let $ERROR_SERVICE_CANNOT_ACCEPT_CTRL=1061;# intermediate, during start or stop +let $ERROR_SERVICE_NOT_ACTIVE=1062;# service stopped +let $ERROR_INVALID_SERVICE_CONTROL=1052; # The requested control is not valid for this service + +let $sc_exe= C:\Windows\System32\sc.exe; +let $ddir= $MYSQLTEST_VARDIR/tmp/$datadir_name; +let $service_name=$service_name_prefix$MASTER_MYPORT; +let TMP= $MYSQLTEST_VARDIR/tmp; +let $upgrade_log=$TMP/mysql_upgrade_service.$service_name.log; + +use mysql; +error 0,1; +rmdir $ddir; + +--disable_result_log +error 0,$ERROR_SERVICE_DOES_NOT_EXIST; +exec $sc_exe delete $service_name; +--enable_result_log + +source include/shutdown_mysqld.inc; +echo # run mysql_install_db with --service parameter; +--disable_result_log +exec $MYSQL_INSTALL_DB_EXE --datadir=$ddir --port=$MASTER_MYPORT --password=$password --service=$service_name --verbose-bootstrap -R; +--enable_result_log + +echo # Start service; +--disable_result_log +exec $sc_exe start $service_name; +--enable_result_log + +enable_reconnect; +source include/wait_until_connected_again.inc; +disable_reconnect; + +echo # -- Upgrade service (online) --; +--replace_result $upgrade_log UPGRADE_LOG $service_name SERVICE_NAME +let $sys_errno=0; +let $upgrade_success = 1; +error 0,1; +exec $MARIADB_UPGRADE_SERVICE_EXE --service=$service_name; + +if($sys_errno != 0) +{ + let $upgrade_success = 0; +} + +echo # upgrade_success(online)=$upgrade_success; +file_exists $upgrade_log; +if ($upgrade_success == 0) +{ + echo --detailed error(online upgrade)--; + cat_file $upgrade_log; +} +# stop service +--disable_result_log +# Wait until stopped +let $sys_errno=0; +while($sys_errno != $ERROR_SERVICE_NOT_ACTIVE) +{ + --error 0,$ERROR_SERVICE_CANNOT_ACCEPT_CTRL,$ERROR_SERVICE_NOT_ACTIVE, $ERROR_INVALID_SERVICE_CONTROL + exec $sc_exe stop $service_name; + if($sys_errno != $ERROR_SERVICE_NOT_ACTIVE) + { + --real_sleep 0.1 + } +} +--enable_result_log +echo # Service stopped; + +echo # -- Upgrade service (offline) --; +--replace_result $upgrade_log UPGRADE_LOG $service_name SERVICE_NAME +let $sys_errno=0; +let $upgrade_success = 1; +error 0,1; +exec $MARIADB_UPGRADE_SERVICE_EXE --service=$service_name; + +if($sys_errno != 0) +{ + let $upgrade_success = 0; +} + +echo # upgrade_success(offline)=$upgrade_success; +file_exists $upgrade_log; +if ($upgrade_success == 0) +{ + echo --detailed error(online upgrade)--; + cat_file $upgrade_log; +} + +echo # Delete service; +let $sys_errno=0; +--disable_result_log +exec $sc_exe delete $service_name; +--enable_result_log + +# Cleanup +source include/wait_until_disconnected.inc; +rmdir $ddir; +remove_file $upgrade_log; +let TEMP=$old_temp; + +#restart original server +connection default; +source include/start_mysqld.inc; diff -Nru mariadb-10.11.11/mysql-test/main/mdev-35721-ubsan.result mariadb-10.11.13/mysql-test/main/mdev-35721-ubsan.result --- mariadb-10.11.11/mysql-test/main/mdev-35721-ubsan.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mdev-35721-ubsan.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,21 @@ +CREATE TABLE t (c1 VARCHAR(10),c2 VARCHAR(10),PRIMARY KEY(c1,c2),FULLTEXT KEY k (c2)) ENGINE=InnoDB; +INSERT INTO t VALUES ('a','b'); +DROP TABLE t; +CREATE TABLE t (c1 VARCHAR(10),c2 VARCHAR(10),PRIMARY KEY(c1,c2),FULLTEXT KEY k (c2)) ENGINE=InnoDB; +DELETE FROM t; +DROP TABLE t; +CREATE TABLE t (a INT(1),d INT(1),b VARCHAR(1),c CHAR(1),c3 INT(1) GENERATED ALWAYS AS ((a + LENGTH (d))) STORED,c2 CHAR(1) GENERATED ALWAYS AS (SUBSTR(b,0,0)) VIRTUAL,k1 CHAR(1) GENERATED ALWAYS AS (SUBSTR(b,0,0)) VIRTUAL,PRIMARY KEY(b (1),a,d),KEY d (d),KEY a (a),KEY c_renamed (c (1),b (1)),KEY b (b (1),c (1),a),KEY k1 (k1),KEY a_2 (a,k1),KEY k1_2 (k1,d)) DEFAULT CHARSET=latin1 ENGINE=InnoDB; +DELETE FROM t; +DROP TABLE t; +CREATE TABLE t (a INT,ROW_START TIMESTAMP(6) AS ROW START,ROW_END TIMESTAMP(6) AS ROW END,PERIOD FOR SYSTEM_TIME(ROW_START,ROW_END),INDEX (ROW_START),INDEX (ROW_END),PRIMARY KEY(ROW_END,a,ROW_START),INDEX (ROW_END,ROW_START,a)) WITH SYSTEM VERSIONING ENGINE=InnoDB; +SHOW INDEX FROM t; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment Ignored +t 0 PRIMARY 1 ROW_END A 0 NULL NULL BTREE NO +t 0 PRIMARY 2 a A 0 NULL NULL BTREE NO +t 0 PRIMARY 3 ROW_START A 0 NULL NULL BTREE NO +t 1 ROW_START 1 ROW_START A 0 NULL NULL BTREE NO +t 1 ROW_END 1 ROW_END A 0 NULL NULL BTREE NO +t 1 ROW_END_2 1 ROW_END A 0 NULL NULL BTREE NO +t 1 ROW_END_2 2 ROW_START A 0 NULL NULL BTREE NO +t 1 ROW_END_2 3 a A 0 NULL NULL BTREE NO +DROP TABLE t; diff -Nru mariadb-10.11.11/mysql-test/main/mdev-35721-ubsan.test mariadb-10.11.13/mysql-test/main/mdev-35721-ubsan.test --- mariadb-10.11.11/mysql-test/main/mdev-35721-ubsan.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mdev-35721-ubsan.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,22 @@ + +--source include/have_innodb.inc + +CREATE TABLE t (c1 VARCHAR(10),c2 VARCHAR(10),PRIMARY KEY(c1,c2),FULLTEXT KEY k (c2)) ENGINE=InnoDB; +INSERT INTO t VALUES ('a','b'); + +DROP TABLE t; + +CREATE TABLE t (c1 VARCHAR(10),c2 VARCHAR(10),PRIMARY KEY(c1,c2),FULLTEXT KEY k (c2)) ENGINE=InnoDB; +DELETE FROM t; + +DROP TABLE t; + +CREATE TABLE t (a INT(1),d INT(1),b VARCHAR(1),c CHAR(1),c3 INT(1) GENERATED ALWAYS AS ((a + LENGTH (d))) STORED,c2 CHAR(1) GENERATED ALWAYS AS (SUBSTR(b,0,0)) VIRTUAL,k1 CHAR(1) GENERATED ALWAYS AS (SUBSTR(b,0,0)) VIRTUAL,PRIMARY KEY(b (1),a,d),KEY d (d),KEY a (a),KEY c_renamed (c (1),b (1)),KEY b (b (1),c (1),a),KEY k1 (k1),KEY a_2 (a,k1),KEY k1_2 (k1,d)) DEFAULT CHARSET=latin1 ENGINE=InnoDB; +DELETE FROM t; + +DROP TABLE t; + +CREATE TABLE t (a INT,ROW_START TIMESTAMP(6) AS ROW START,ROW_END TIMESTAMP(6) AS ROW END,PERIOD FOR SYSTEM_TIME(ROW_START,ROW_END),INDEX (ROW_START),INDEX (ROW_END),PRIMARY KEY(ROW_END,a,ROW_START),INDEX (ROW_END,ROW_START,a)) WITH SYSTEM VERSIONING ENGINE=InnoDB; +SHOW INDEX FROM t; + +DROP TABLE t; diff -Nru mariadb-10.11.11/mysql-test/main/mdl_sync.result mariadb-10.11.13/mysql-test/main/mdl_sync.result --- mariadb-10.11.11/mysql-test/main/mdl_sync.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mdl_sync.result 2025-05-19 16:14:24.000000000 +0000 @@ -2431,9 +2431,6 @@ create table t2 (a int) stats_persistent=0, engine=innodb; insert into t1 values (1); insert into t2 values (1); -connect con1, localhost, root; -start transaction with consistent snapshot; -connection default; SET DEBUG_SYNC= 'after_open_table_mdl_shared SIGNAL table_opened WAIT_FOR grlwait execute 2'; update t1,t2 set t1.a=2,t2.a=3; connection con2; @@ -2456,6 +2453,7 @@ SET DEBUG_SYNC= 'now WAIT_FOR table_opened'; SET DEBUG_SYNC= 'mdl_acquire_lock_wait SIGNAL grlwait'; FLUSH TABLES WITH READ LOCK; +InnoDB 0 transactions not purged SELECT LOCK_MODE, LOCK_TYPE, TABLE_SCHEMA, TABLE_NAME FROM information_schema.metadata_lock_info; LOCK_MODE LOCK_TYPE TABLE_SCHEMA TABLE_NAME MDL_BACKUP_FTWRL2 Backup lock @@ -2465,7 +2463,6 @@ SET DEBUG_SYNC= 'RESET'; drop table t1,t2; disconnect con2; -disconnect con1; # # Bug#50786 Assertion `thd->mdl_context.trans_sentinel() == __null' # failed in open_ltable() diff -Nru mariadb-10.11.11/mysql-test/main/mdl_sync.test mariadb-10.11.13/mysql-test/main/mdl_sync.test --- mariadb-10.11.11/mysql-test/main/mdl_sync.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mdl_sync.test 2025-05-19 16:14:24.000000000 +0000 @@ -3115,12 +3115,6 @@ insert into t1 values (1); insert into t2 values (1); -connect (con1, localhost, root); -# disable innodb purge thread, otherwise it might start purging t2, -# and will take an mdl, affecting metadata_lock_info output. -start transaction with consistent snapshot; -connection default; - SET DEBUG_SYNC= 'after_open_table_mdl_shared SIGNAL table_opened WAIT_FOR grlwait execute 2'; --send update t1,t2 set t1.a=2,t2.a=3 @@ -3156,6 +3150,7 @@ let $wait_condition= SELECT COUNT(*)=1 FROM information_schema.metadata_lock_info; --source include/wait_condition.inc +--source ../suite/innodb/include/wait_all_purged.inc SELECT LOCK_MODE, LOCK_TYPE, TABLE_SCHEMA, TABLE_NAME FROM information_schema.metadata_lock_info; unlock tables; @@ -3166,7 +3161,6 @@ SET DEBUG_SYNC= 'RESET'; drop table t1,t2; disconnect con2; -disconnect con1; --echo # --echo # Bug#50786 Assertion `thd->mdl_context.trans_sentinel() == __null' diff -Nru mariadb-10.11.11/mysql-test/main/merge.result mariadb-10.11.13/mysql-test/main/merge.result --- mariadb-10.11.11/mysql-test/main/merge.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/merge.result 2025-05-19 16:14:24.000000000 +0000 @@ -3678,33 +3678,22 @@ insert into t1 (a) values (1); insert into t3 (b) values (1); insert into m1 (a) values ((select max(a) from m1)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from m2)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from t1)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from t2)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from t3, m1)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from t3, m2)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from t3, t1)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from t3, t2)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from tmp, m1)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from tmp, m2)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from tmp, t1)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from tmp, t2)); -ERROR HY000: Table 'm1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into m1 (a) values ((select max(a) from v1)); -ERROR HY000: The definition of table 'v1' prevents operation INSERT on table 'm1' insert into m1 (a) values ((select max(a) from tmp, v1)); -ERROR HY000: The definition of table 'v1' prevents operation INSERT on table 'm1' +select count(*) from m1; +count(*) +15 drop view v1; drop temporary table tmp; drop table t1, t2, t3, m1, m2; diff -Nru mariadb-10.11.11/mysql-test/main/merge.test mariadb-10.11.13/mysql-test/main/merge.test --- mariadb-10.11.11/mysql-test/main/merge.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/merge.test 2025-05-19 16:14:24.000000000 +0000 @@ -2670,37 +2670,24 @@ insert into t1 (a) values (1); insert into t3 (b) values (1); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from m1)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from m2)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from t1)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from t2)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from t3, m1)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from t3, m2)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from t3, t1)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from t3, t2)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from tmp, m1)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from tmp, m2)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from tmp, t1)); ---error ER_UPDATE_TABLE_USED insert into m1 (a) values ((select max(a) from tmp, t2)); - ---error ER_VIEW_PREVENT_UPDATE + insert into m1 (a) values ((select max(a) from v1)); ---error ER_VIEW_PREVENT_UPDATE insert into m1 (a) values ((select max(a) from tmp, v1)); +select count(*) from m1; drop view v1; diff -Nru mariadb-10.11.11/mysql-test/main/multi_update.result mariadb-10.11.13/mysql-test/main/multi_update.result --- mariadb-10.11.11/mysql-test/main/multi_update.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/multi_update.result 2025-05-19 16:14:24.000000000 +0000 @@ -1389,3 +1389,23 @@ 12 5 8 drop table t1,t2,t3,t; # End of 10.4 tests +# +# MDEV-31647 Stack looping and SIGSEGV in Item_args::walk_args on UPDATE +# +create table t1 (c int, c2 int) engine=innodb; +update t1 set c=0 where c=( +select 1 from (select 1 as v1) as v2 +natural join t1) order by last_value (c2) over (order by c2); +ERROR HY000: Invalid use of group function +update t1 set c=0 where c=( +select 1 from (select 1 as v1) as v2 +natural join t1) order by last_value (c2) over (); +ERROR HY000: Invalid use of group function +update t1 set c=0 where c=( +select 1 from (select 1 as v1) as v2 +natural join t1) order by c2; +select 1 from (select 1 as v1) as v2 +natural join t1 order by last_value (c2) over (order by c2); +1 +drop table t1; +# End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/multi_update.test mariadb-10.11.13/mysql-test/main/multi_update.test --- mariadb-10.11.11/mysql-test/main/multi_update.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/multi_update.test 2025-05-19 16:14:24.000000000 +0000 @@ -1200,3 +1200,31 @@ drop table t1,t2,t3,t; --echo # End of 10.4 tests + +--echo # +--echo # MDEV-31647 Stack looping and SIGSEGV in Item_args::walk_args on UPDATE +--echo # +--source include/have_innodb.inc +create table t1 (c int, c2 int) engine=innodb; + +--error ER_INVALID_GROUP_FUNC_USE +update t1 set c=0 where c=( + select 1 from (select 1 as v1) as v2 + natural join t1) order by last_value (c2) over (order by c2); + +--error ER_INVALID_GROUP_FUNC_USE +update t1 set c=0 where c=( + select 1 from (select 1 as v1) as v2 + natural join t1) order by last_value (c2) over (); + +update t1 set c=0 where c=( + select 1 from (select 1 as v1) as v2 + natural join t1) order by c2; + +select 1 from (select 1 as v1) as v2 + natural join t1 order by last_value (c2) over (order by c2); + + +drop table t1; + +--echo # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/my_getopt_case_insensitive.opt mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.opt --- mariadb-10.11.11/mysql-test/main/my_getopt_case_insensitive.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.opt 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1 @@ +--slOw_QuEry_loG=OFF diff -Nru mariadb-10.11.11/mysql-test/main/my_getopt_case_insensitive.result mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.result --- mariadb-10.11.11/mysql-test/main/my_getopt_case_insensitive.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,8 @@ +# +# MDEV-27126: my_getopt compares option names case sensitively +# +# Check if the variable is set correctly from options +SELECT @@GLOBAL.slow_query_log; +@@GLOBAL.slow_query_log +0 +# End of test. diff -Nru mariadb-10.11.11/mysql-test/main/my_getopt_case_insensitive.test mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.test --- mariadb-10.11.11/mysql-test/main/my_getopt_case_insensitive.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/my_getopt_case_insensitive.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,8 @@ +--echo # +--echo # MDEV-27126: my_getopt compares option names case sensitively +--echo # + +--echo # Check if the variable is set correctly from options +SELECT @@GLOBAL.slow_query_log; + +--echo # End of test. diff -Nru mariadb-10.11.11/mysql-test/main/myisam-big.result mariadb-10.11.13/mysql-test/main/myisam-big.result --- mariadb-10.11.11/mysql-test/main/myisam-big.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/myisam-big.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,7 @@ drop table if exists t1,t2; +call mtr.add_suppression("Index.*try to repair it"); +call mtr.add_suppression("Disk got full"); +call mtr.add_suppression("Got an error from thread_id"); create table t1 (id int, sometext varchar(100)) engine=myisam; insert into t1 values (1, "hello"),(2, "hello2"),(4, "hello3"),(4, "hello4"); create table t2 like t1; @@ -43,4 +46,9 @@ connection con2; disconnect con2; connection default; +SET @saved_dbug = @@SESSION.debug_dbug; +SET debug_dbug='+d,simulate_file_pwrite_error'; +insert into t1 select * from t2; +ERROR HY000: Disk got full writing 'test.t1' (Errcode: 28 "No space left on device") +SET debug_dbug= @saved_dbug; drop table t1,t2; diff -Nru mariadb-10.11.11/mysql-test/main/myisam-big.test mariadb-10.11.13/mysql-test/main/myisam-big.test --- mariadb-10.11.11/mysql-test/main/myisam-big.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/myisam-big.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,12 +1,17 @@ # # Test bugs in the MyISAM code that require more space/time --source include/big_test.inc +--source include/have_debug.inc # Initialise --disable_warnings drop table if exists t1,t2; --enable_warnings +call mtr.add_suppression("Index.*try to repair it"); +call mtr.add_suppression("Disk got full"); +call mtr.add_suppression("Got an error from thread_id"); + # # BUG#925377: # Querying myisam table metadata while 'alter table..enable keys' is @@ -61,4 +66,12 @@ reap; disconnect con2; connection default; + +# +# Test error message from disk full +SET @saved_dbug = @@SESSION.debug_dbug; +SET debug_dbug='+d,simulate_file_pwrite_error'; +--error ER_DISK_FULL +insert into t1 select * from t2; +SET debug_dbug= @saved_dbug; drop table t1,t2; diff -Nru mariadb-10.11.11/mysql-test/main/mysql-interactive.result mariadb-10.11.13/mysql-test/main/mysql-interactive.result --- mariadb-10.11.11/mysql-test/main/mysql-interactive.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysql-interactive.result 2025-05-19 16:14:24.000000000 +0000 @@ -4,6 +4,7 @@ delimiter $ select 1; $ +exit Welcome to the MariaDB monitor. Commands end with ; or \g. Your MariaDB connection id is X Server version: Y @@ -21,4 +22,5 @@ +---+ 1 row in set -MariaDB [(none)]> \ No newline at end of file +MariaDB [(none)]> exit +Bye diff -Nru mariadb-10.11.11/mysql-test/main/mysql-interactive.test mariadb-10.11.13/mysql-test/main/mysql-interactive.test --- mariadb-10.11.11/mysql-test/main/mysql-interactive.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysql-interactive.test 2025-05-19 16:14:24.000000000 +0000 @@ -6,23 +6,16 @@ # this would need an instrumented ncurses library source include/not_msan.inc; -error 0,1; -exec $MYSQL -V|grep -q readline; -if ($sys_errno == 1) -{ - # strangely enough - skip does not work with libedit; -} - write_file $MYSQL_TMP_DIR/mysql_in; delimiter $ select 1; $ +exit EOF let TERM=dumb; replace_regex /id is \d+/id is X/ /Server version: .*/Server version: Y/ / \(\d+\.\d+ sec\)//; error 0,127; -exec socat EXEC:"$MYSQL",pty STDIO < $MYSQL_TMP_DIR/mysql_in; +exec socat -t10 EXEC:"$MYSQL",pty STDIO < $MYSQL_TMP_DIR/mysql_in; if ($sys_errno == 127) { remove_file $MYSQL_TMP_DIR/mysql_in; diff -Nru mariadb-10.11.11/mysql-test/main/mysql_upgrade-34014.result mariadb-10.11.13/mysql-test/main/mysql_upgrade-34014.result --- mariadb-10.11.11/mysql-test/main/mysql_upgrade-34014.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysql_upgrade-34014.result 2025-05-19 16:14:24.000000000 +0000 @@ -12,6 +12,8 @@ SHOW CREATE DATABASE sys; Database Create Database sys CREATE DATABASE `sys` /*!40100 DEFAULT CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_ci */ +Warnings: +Note 1105 Database 'sys' does not have a db.opt file. You can create one with ALTER DATABASE if needed Phase 1/8: Checking and upgrading mysql database Processing databases mysql diff -Nru mariadb-10.11.11/mysql-test/main/mysql_upgrade.result mariadb-10.11.13/mysql-test/main/mysql_upgrade.result --- mariadb-10.11.11/mysql-test/main/mysql_upgrade.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysql_upgrade.result 2025-05-19 16:14:24.000000000 +0000 @@ -151,7 +151,8 @@ Phase 8/8: Running 'FLUSH PRIVILEGES' OK Run it again - should say already completed -This installation of MariaDB is already upgraded to VERSION.There is no need to run mysql_upgrade again for VERSION. +This installation of MariaDB is already upgraded to X.Y.Z-MariaDB. +There is no need to run mysql_upgrade again. You can use --force if you still want to run mysql_upgrade Force should run it regardless of whether it has been run before Phase 1/8: Checking and upgrading mysql database @@ -1911,11 +1912,11 @@ # # MDEV-27279: mariadb_upgrade add --check-if-upgrade-is-needed # -This installation of MariaDB is already upgraded to MariaDB . -There is no need to run mysql_upgrade again for MariaDB . +This installation of MariaDB is already upgraded to X.Y.Z-MariaDB. +There is no need to run mysql_upgrade again. Looking for 'mariadb' as: mariadb -This installation of MariaDB is already upgraded to MariaDB . -There is no need to run mysql_upgrade again for MariaDB . +This installation of MariaDB is already upgraded to X.Y.Z-MariaDB. +There is no need to run mysql_upgrade again. # # MDEV-27279: mariadb_upgrade check-if-upgrade absence is do it # @@ -1925,17 +1926,17 @@ # MDEV-27279: mariadb_upgrade check-if-upgrade with minor version change # Looking for 'mariadb' as: mariadb -This installation of MariaDB is already upgraded to MariaDB . -There is no need to run mysql_upgrade again for MariaDB . -This installation of MariaDB is already upgraded to MariaDB . -There is no need to run mysql_upgrade again for MariaDB . +This installation of MariaDB is already upgraded to X.Y.0-MariaDB. +There is no need to run mysql_upgrade again for X.Y.Z-MariaDB, because they're both X.Y. +This installation of MariaDB is already upgraded to X.Y.0-MariaDB. +There is no need to run mysql_upgrade again for X.Y.Z-MariaDB, because they're both X.Y. You can use --force if you still want to run mysql_upgrade # # MDEV-27279: mariadb_upgrade check-if-upgrade with major version change # -Major version upgrade detected from MariaDB to MariaDB . Check required! +Major version upgrade detected from X.0.99 to X.Y.Z-MariaDB. Check required! Looking for 'mysql' as: mysql -Major version upgrade detected from MariaDB to MariaDB . Check required! +Major version upgrade detected from X.0.99 to X.Y.Z-MariaDB. Check required! drop table mysql.global_priv; rename table mysql.global_priv_bak to mysql.global_priv; # End of 10.2 tests diff -Nru mariadb-10.11.11/mysql-test/main/mysql_upgrade.test mariadb-10.11.13/mysql-test/main/mysql_upgrade.test --- mariadb-10.11.11/mysql-test/main/mysql_upgrade.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysql_upgrade.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,8 +1,12 @@ +--source include/long_test.inc -- source include/mysql_upgrade_preparation.inc -- source include/have_working_dns.inc -- source include/have_innodb.inc -- source include/have_partition.inc --- source include/no_valgrind_without_big.inc + +let majorminor=`select substring_index(version(), '.', 2)`; +# for major upgrade test, see below +let major=`select substring_index(version(), '.', 1) - (version() like '%.0.%')`; set sql_mode=""; @@ -19,7 +23,7 @@ file_exists $MYSQLD_DATADIR/mysql_upgrade_info; --echo Run it again - should say already completed ---replace_regex /upgraded to [^\n].*/upgraded to VERSION./ /again for [^\n]*/again for VERSION./ +--replace_result $MYSQL_SERVER_VERSION X.Y.Z-MariaDB --exec $MYSQL_UPGRADE 2>&1 # It should have created a file in the MySQL Servers datadir @@ -289,10 +293,11 @@ --error 1 --exec $MYSQL_UPGRADE --check-if-upgrade-is-needed --silent ---replace_regex /\d\d\.\d*\.\d*[^ .\n]*/MariaDB / +--replace_result $MYSQL_SERVER_VERSION X.Y.Z-MariaDB --error 1 --exec $MYSQL_UPGRADE --check-if-upgrade-is-needed ---replace_regex /\d\d\.\d*\.\d*[^ .\n]*/MariaDB / /'mariadb.* as:[^\n]*/'mariadb' as: mariadb/ +--replace_result $MYSQL_SERVER_VERSION X.Y.Z-MariaDB +--replace_regex /'mariadb.* as:[^\n]*/'mariadb' as: mariadb/ --error 1 --exec $MYSQL_UPGRADE --check-if-upgrade-is-needed --verbose @@ -320,16 +325,18 @@ my $file= $ENV{'DATADIR'} or die "MYSQLD_DATADIR not set"; $ver =~ s/^(\d*)\.(\d*).(\d*)(.*)/$1.$2.0$4/; open(FILE, ">$file/mysql_upgrade_info") or die "Failed to open $file"; + binmode FILE; print FILE "$ver\n"; close(FILE); EOF --error 1 --exec $MYSQL_UPGRADE --check-if-upgrade-is-needed --silent ---replace_regex /\d\d\.\d*\.\d*[^ .\n]*/MariaDB / /'mariadb.* as:[^\n]*/'mariadb' as: mariadb/ +--replace_result $MYSQL_SERVER_VERSION X.Y.Z-MariaDB $majorminor X.Y +--replace_regex /'mariadb.* as:[^\n]*/'mariadb' as: mariadb/ --error 1 --exec $MYSQL_UPGRADE --check-if-upgrade-is-needed --verbose ---replace_regex /\d\d\.\d*\.\d*[^ .\n]*/MariaDB / +--replace_result $MYSQL_SERVER_VERSION X.Y.Z-MariaDB $majorminor X.Y --exec $MYSQL_UPGRADE --remove_file $MYSQLD_DATADIR/mysql_upgrade_info @@ -344,16 +351,18 @@ perl; my $ver= $ENV{'MYSQL_SERVER_VERSION'} or die "MYSQL_SERVER_VERSION not set"; my $file= $ENV{'DATADIR'} or die "MYSQLD_DATADIR not set"; - $ver =~ s/^(\d*)\.(\d*).(\d*)(.*)/$1.0.$3$4/; + $ver =~ s/^(\d*)\.(\d*).(\d*)(.*)/$1.0.99/; open(FILE, ">$file/mysql_upgrade_info") or die "Failed to open $file"; + binmode FILE; print FILE "$ver\n"; close(FILE); EOF --exec $MYSQL_UPGRADE --check-if-upgrade-is-needed --silent ---replace_regex /\d\d\.\d*\.\d*[^ .\n]*/MariaDB / +--replace_result $MYSQL_SERVER_VERSION X.Y.Z-MariaDB $major X --exec $MYSQL_UPGRADE --check-if-upgrade-is-needed ---replace_regex /\d\d\.\d*\.\d*[^ .\n]*/MariaDB / /'mariadb.* as:[^\n]*/'mysql' as: mysql/ +--replace_result $MYSQL_SERVER_VERSION X.Y.Z-MariaDB $major X +--replace_regex /'mariadb.* as:[^\n]*/'mysql' as: mysql/ --exec $MYSQL_UPGRADE --check-if-upgrade-is-needed --verbose --remove_file $MYSQLD_DATADIR/mysql_upgrade_info drop table mysql.global_priv; diff -Nru mariadb-10.11.11/mysql-test/main/mysqld--help.result mariadb-10.11.13/mysql-test/main/mysqld--help.result --- mariadb-10.11.11/mysql-test/main/mysqld--help.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysqld--help.result 2025-05-19 16:14:24.000000000 +0000 @@ -748,7 +748,8 @@ keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in - selectivity_for_indexes. selectivity_multiplier. This + selectivity_for_indexes. fix_derived_table_read_cost = + Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. Use 'ALL' to set all combinations. diff -Nru mariadb-10.11.11/mysql-test/main/mysqldump-system.result mariadb-10.11.13/mysql-test/main/mysqldump-system.result --- mariadb-10.11.11/mysql-test/main/mysqldump-system.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysqldump-system.result 2025-05-19 16:14:24.000000000 +0000 @@ -650,21 +650,21 @@ /*M!100401 UNINSTALL PLUGIN IF EXIST cleartext_plugin_server */; INSTALL PLUGIN cleartext_plugin_server SONAME 'AUTH_TEST_PLUGIN_LIB'; DELIMITER | -/*M!100101 IF current_user()="'mariadb.sys'@'localhost'" THEN +/*M!100101 IF current_user()='''mariadb.sys''@''localhost''' THEN SIGNAL SQLSTATE '45000' SET MYSQL_ERRNO=30001, MESSAGE_TEXT="Don't remove current user 'mariadb.sys'@'localhost''"; END IF */| DELIMITER ; /*!50701 DROP USER IF EXISTS 'mariadb.sys'@'localhost' */; CREATE /*M!100103 OR REPLACE */ USER `mariadb.sys`@`localhost` PASSWORD EXPIRE; DELIMITER | -/*M!100101 IF current_user()="'root'@'localhost'" THEN +/*M!100101 IF current_user()='''root''@''localhost''' THEN SIGNAL SQLSTATE '45000' SET MYSQL_ERRNO=30001, MESSAGE_TEXT="Don't remove current user 'root'@'localhost''"; END IF */| DELIMITER ; /*!50701 DROP USER IF EXISTS 'root'@'localhost' */; CREATE /*M!100103 OR REPLACE */ USER `root`@`localhost`; DELIMITER | -/*M!100101 IF current_user()="'foobar'@'%'" THEN +/*M!100101 IF current_user()='''foobar''@''%''' THEN SIGNAL SQLSTATE '45000' SET MYSQL_ERRNO=30001, MESSAGE_TEXT="Don't remove current user 'foobar'@'%''"; END IF */| DELIMITER ; diff -Nru mariadb-10.11.11/mysql-test/main/mysqldump.result mariadb-10.11.13/mysql-test/main/mysqldump.result --- mariadb-10.11.11/mysql-test/main/mysqldump.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysqldump.result 2025-05-19 16:14:24.000000000 +0000 @@ -6747,6 +6747,39 @@ /*!40101 SET character_set_client = @saved_cs_client */; ERROR at line 9: Not allowed in the sandbox mode drop table t1; +# +# MDEV-36268 mariadb-dump used wrong quoting character +# +create table t1 (a int); +create view `v'1"2` as select * from t1 with check option; +/*M!999999\- enable the sandbox mode */ +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8mb4 */; +CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci; +/*!40101 SET character_set_client = @saved_cs_client */; +SET @saved_cs_client = @@character_set_client; +SET character_set_client = utf8mb4; +/*!50001 CREATE VIEW `v'1"2` AS SELECT + 1 AS `a` */; +SET character_set_client = @saved_cs_client; +/*!50001 DROP VIEW IF EXISTS `v'1"2`*/; +/*!50001 SET @saved_cs_client = @@character_set_client */; +/*!50001 SET @saved_cs_results = @@character_set_results */; +/*!50001 SET @saved_col_connection = @@collation_connection */; +/*!50001 SET character_set_client = utf8mb3 */; +/*!50001 SET character_set_results = utf8mb3 */; +/*!50001 SET collation_connection = utf8mb3_general_ci */; +/*!50001 CREATE ALGORITHM=UNDEFINED */ +/*!50013 DEFINER=`root`@`localhost` SQL SECURITY DEFINER */ +/*!50001 VIEW `v'1"2` AS select `t1`.`a` AS `a` from `t1` */ +/*!50002 WITH CASCADED CHECK OPTION */; +/*!50001 SET character_set_client = @saved_cs_client */; +/*!50001 SET character_set_results = @saved_cs_results */; +/*!50001 SET collation_connection = @saved_col_connection */; +drop view `v'1"2`; +drop table t1; # End of 10.5 tests # # MDEV-16733 mysqldump --tab and --xml options are conflicting diff -Nru mariadb-10.11.11/mysql-test/main/mysqldump.test mariadb-10.11.13/mysql-test/main/mysqldump.test --- mariadb-10.11.11/mysql-test/main/mysqldump.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysqldump.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,4 @@ ---source include/no_valgrind_without_big.inc +--source include/long_test.inc --source include/have_utf8mb4.inc call mtr.add_suppression("@003f.frm' \\(errno: 22\\)"); @@ -3029,6 +3029,15 @@ --remove_file $MYSQLTEST_VARDIR/tmp/mdev33727.sql drop table t1; +--echo # +--echo # MDEV-36268 mariadb-dump used wrong quoting character +--echo # +create table t1 (a int); +create view `v'1"2` as select * from t1 with check option; # "' +--exec $MYSQL_DUMP --compact test +drop view `v'1"2`; # "' +drop table t1; + --echo # End of 10.5 tests --echo # diff -Nru mariadb-10.11.11/mysql-test/main/mysqlslap.result mariadb-10.11.13/mysql-test/main/mysqlslap.result --- mariadb-10.11.11/mysql-test/main/mysqlslap.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysqlslap.result 2025-05-19 16:14:24.000000000 +0000 @@ -260,3 +260,6 @@ # # Bug MDEV-15789 (Upstream: #80329): MYSQLSLAP OPTIONS --AUTO-GENERATE-SQL-GUID-PRIMARY and --AUTO-GENERATE-SQL-SECONDARY-INDEXES DONT WORK # +# +# Bug MDEV-34621: Fix division by zero in mariadb-slap when iterations=0 +# diff -Nru mariadb-10.11.11/mysql-test/main/mysqlslap.test mariadb-10.11.13/mysql-test/main/mysqlslap.test --- mariadb-10.11.11/mysql-test/main/mysqlslap.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysqlslap.test 2025-05-19 16:14:24.000000000 +0000 @@ -88,3 +88,9 @@ --exec $MYSQL_SLAP --concurrency=1 --silent --iterations=1 --number-int-cols=2 --number-char-cols=3 --auto-generate-sql --auto-generate-sql-guid-primary --create-schema=slap --exec $MYSQL_SLAP --concurrency=1 --silent --iterations=1 --number-int-cols=2 --number-char-cols=3 --auto-generate-sql --auto-generate-sql-secondary-indexes=1 --create-schema=slap + +--echo # +--echo # Bug MDEV-34621: Fix division by zero in mariadb-slap when iterations=0 +--echo # + +--exec $MYSQL_SLAP -i0 --only-print diff -Nru mariadb-10.11.11/mysql-test/main/mysqltest.result mariadb-10.11.13/mysql-test/main/mysqltest.result --- mariadb-10.11.11/mysql-test/main/mysqltest.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysqltest.result 2025-05-19 16:14:24.000000000 +0000 @@ -989,4 +989,13 @@ foo\"bar foo\"bar set sql_mode=default; +# +# MDEV-29344: engines/iuds.insert_time cannot run with PS protocol (syntax error) +# +SELECT 1 /* doesn't throw error */; +1 +1 +SELECT 1 /* doesn't throw error */; +1 +1 End of tests diff -Nru mariadb-10.11.11/mysql-test/main/mysqltest.test mariadb-10.11.13/mysql-test/main/mysqltest.test --- mariadb-10.11.11/mysql-test/main/mysqltest.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/mysqltest.test 2025-05-19 16:14:24.000000000 +0000 @@ -2954,6 +2954,12 @@ select "foo\""bar"; set sql_mode=default; +--echo # +--echo # MDEV-29344: engines/iuds.insert_time cannot run with PS protocol (syntax error) +--echo # +SELECT 1 /* doesn't throw error */; +SELECT 1 /* doesn't throw error */; + --echo End of tests # Wait till we reached the initial number of concurrent sessions diff -Nru mariadb-10.11.11/mysql-test/main/partition_myisam.result mariadb-10.11.13/mysql-test/main/partition_myisam.result --- mariadb-10.11.11/mysql-test/main/partition_myisam.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/partition_myisam.result 2025-05-19 16:14:24.000000000 +0000 @@ -259,3 +259,24 @@ Table Op Msg_type Msg_text test.t1 check status OK DROP TABLE t1; +# +# MDEV-31122 Server crash in get_lock_data / mysql_lock_abort_for_thread +# +CREATE TABLE t1 (a INT); +CREATE TABLE t2 (b INT, c varchar(5)) +PARTITION BY RANGE COLUMNS(c) +SUBPARTITION by key(b) SUBPARTITIONS 2 ( +PARTITION p0 VALUES LESS THAN ('m'), +PARTITION p1 VALUES LESS THAN ('z') +); +connect con1,localhost,root,,; +HANDLER t1 OPEN; +SELECT b FROM t2 PARTITION (p0); +connection default; +SET lock_wait_timeout= 1; +ALTER TABLE t1 FORCE; +connection con1; +b +disconnect con1; +connection default; +DROP TABLE t2, t1; diff -Nru mariadb-10.11.11/mysql-test/main/partition_myisam.test mariadb-10.11.13/mysql-test/main/partition_myisam.test --- mariadb-10.11.11/mysql-test/main/partition_myisam.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/partition_myisam.test 2025-05-19 16:14:24.000000000 +0000 @@ -249,3 +249,31 @@ ALTER TABLE `t1` REMOVE PARTITIONING; CHECK TABLE `t1` EXTENDED; DROP TABLE t1; + +--echo # +--echo # MDEV-31122 Server crash in get_lock_data / mysql_lock_abort_for_thread +--echo # +CREATE TABLE t1 (a INT); + +CREATE TABLE t2 (b INT, c varchar(5)) + PARTITION BY RANGE COLUMNS(c) + SUBPARTITION by key(b) SUBPARTITIONS 2 ( + PARTITION p0 VALUES LESS THAN ('m'), + PARTITION p1 VALUES LESS THAN ('z') + ); + +--connect (con1,localhost,root,,) +HANDLER t1 OPEN; +--send + SELECT b FROM t2 PARTITION (p0); + +--connection default +SET lock_wait_timeout= 1; +--error 0,ER_STATEMENT_TIMEOUT,ER_LOCK_WAIT_TIMEOUT +ALTER TABLE t1 FORCE; + +--connection con1 +--reap +--disconnect con1 +--connection default +DROP TABLE t2, t1; diff -Nru mariadb-10.11.11/mysql-test/main/query_cache.result mariadb-10.11.13/mysql-test/main/query_cache.result --- mariadb-10.11.11/mysql-test/main/query_cache.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/query_cache.result 2025-05-19 16:14:24.000000000 +0000 @@ -2241,6 +2241,29 @@ set global Query_cache_size=18446744073709547520; SET GLOBAL query_cache_size= @qc; # +# MDEV-34075 corruption when query cache cannot allocate block +# +set global query_cache_type=1; +create table t1 (c1 smallint null, c2 binary (25) not null, c3 tinyint(4) null, c4 binary (15) not null primary key, c5 smallint not null unique key,c6 decimal(10,8) not null default 3.141592) engine=innodb; +set global query_cache_size=81920; +select * from t1 where b=1 and c=1; +ERROR 42S22: Unknown column 'b' in 'WHERE' +set session query_cache_type=1; +drop table t1; +create table t1 (c1 int not null, c2 char(5)) engine=innodb partition by linear key(c1) partitions 99; +select * from t1 where c1 <='1998-12-29 00:00:00' order by c1,c2; +c1 c2 +select group_concat(a separator '###') as names from t1 having left(names, 1)='j'; +ERROR 42S22: Unknown column 'a' in 'SELECT' +select * from t1; +c1 c2 +select count(*) from t1; +count(*) +0 +select G.a, c.a from t1 c, t1 G; +ERROR 42S22: Unknown column 'G.a' in 'SELECT' +drop table t1; +# # End of 10.5 tests # # diff -Nru mariadb-10.11.11/mysql-test/main/query_cache.test mariadb-10.11.13/mysql-test/main/query_cache.test --- mariadb-10.11.11/mysql-test/main/query_cache.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/query_cache.test 2025-05-19 16:14:24.000000000 +0000 @@ -2,6 +2,8 @@ -- source include/long_test.inc -- source include/no_valgrind_without_big.inc -- source include/no_view_protocol.inc +-- source include/have_partition.inc +-- source include/have_innodb.inc --disable_ps2_protocol set @save_query_cache_size=@@query_cache_size; @@ -1853,6 +1855,26 @@ --enable_warnings --echo # +--echo # MDEV-34075 corruption when query cache cannot allocate block +--echo # +set global query_cache_type=1; +create table t1 (c1 smallint null, c2 binary (25) not null, c3 tinyint(4) null, c4 binary (15) not null primary key, c5 smallint not null unique key,c6 decimal(10,8) not null default 3.141592) engine=innodb; +set global query_cache_size=81920; +--error ER_BAD_FIELD_ERROR +select * from t1 where b=1 and c=1; +set session query_cache_type=1; +drop table t1; +create table t1 (c1 int not null, c2 char(5)) engine=innodb partition by linear key(c1) partitions 99; +select * from t1 where c1 <='1998-12-29 00:00:00' order by c1,c2; +--error ER_BAD_FIELD_ERROR +select group_concat(a separator '###') as names from t1 having left(names, 1)='j'; +select * from t1; +select count(*) from t1; +--error ER_BAD_FIELD_ERROR +select G.a, c.a from t1 c, t1 G; +drop table t1; + +--echo # --echo # End of 10.5 tests --echo # diff -Nru mariadb-10.11.11/mysql-test/main/range_notembedded.result mariadb-10.11.13/mysql-test/main/range_notembedded.result --- mariadb-10.11.11/mysql-test/main/range_notembedded.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/range_notembedded.result 2025-05-19 16:14:24.000000000 +0000 @@ -247,3 +247,70 @@ id 5 DROP TABLE t1; +# +# MDEV-34620: Many index_merge variants made and discarded for a big OR +# +CREATE TABLE t1 ( +a1 int NOT NULL, +a2 int NOT NULL, +filler char(100), +KEY key1 (a1,a2), +KEY key2 (a2,a1) +); +insert into t1 (a1,a2) values (1,1),(2,2),(3,3); +set @query= concat( +"explain select * from t1 where\n", +(select +group_concat(concat("a1=", seq, " and a2=", seq, " ") separator "\nor " ) +from seq_1_to_30) +); +set optimizer_trace=1; +prepare s from @query; +execute s; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL key1,key2 NULL NULL NULL 3 Using where +set @trace=json_extract((select trace from information_schema.optimizer_trace), '$**.range_analysis'); +# Observe that "key1" is a a part of several index_merge_union: +select json_pretty(json_search(@trace, 'all', 'key1')); +json_pretty(json_search(@trace, 'all', 'key1')) +[ + "$[0].potential_range_indexes[0].index", + "$[0].analyzing_range_alternatives.range_scan_alternatives[0].index", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[0].indexes_to_merge[0].range_scan_alternatives[0].index", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[0].indexes_to_merge[0].index_to_merge", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[0].indexes_to_merge[1].range_scan_alternatives[0].index", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[0].indexes_to_merge[1].index_to_merge", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[0].range_scan_alternatives[0].index", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[0].index_to_merge", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[1].range_scan_alternatives[0].index", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[1].index_to_merge", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[2].range_scan_alternatives[0].index", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[1].indexes_to_merge[2].index_to_merge", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[2].indexes_to_merge[0].range_scan_alternatives[0].index", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[2].indexes_to_merge[0].index_to_merge", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[2].indexes_to_merge[1].range_scan_alternatives[0].index", + "$[0].analyzing_range_alternatives.analyzing_index_merge_union[2].indexes_to_merge[1].index_to_merge" +] +# +# Now, same as above but for a long IN-list +# +set @query= concat( +"explain select * from t1 where\n", +(select +group_concat(concat("a1=", seq, " and a2=", seq, " ") separator "\nor " ) +from seq_1_to_120) +); +set optimizer_trace=1; +prepare s from @query; +execute s; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL key1,key2 NULL NULL NULL 3 Using where +set @trace=json_extract((select trace from information_schema.optimizer_trace), '$**.range_analysis'); +# Observe that there are NO index_merge_union candidates. Only one potential range scan: +select json_pretty(json_search(@trace, 'all', 'key1')); +json_pretty(json_search(@trace, 'all', 'key1')) +[ + "$[0].potential_range_indexes[0].index", + "$[0].analyzing_range_alternatives.range_scan_alternatives[0].index" +] +drop table t1; diff -Nru mariadb-10.11.11/mysql-test/main/range_notembedded.test mariadb-10.11.13/mysql-test/main/range_notembedded.test --- mariadb-10.11.11/mysql-test/main/range_notembedded.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/range_notembedded.test 2025-05-19 16:14:24.000000000 +0000 @@ -162,3 +162,51 @@ SELECT id FROM t1 WHERE id IS NULL OR id NOT BETWEEN 1 AND 4; DROP TABLE t1; +--echo # +--echo # MDEV-34620: Many index_merge variants made and discarded for a big OR +--echo # + +CREATE TABLE t1 ( + a1 int NOT NULL, + a2 int NOT NULL, + filler char(100), + KEY key1 (a1,a2), + KEY key2 (a2,a1) +); +insert into t1 (a1,a2) values (1,1),(2,2),(3,3); + + +set @query= concat( + "explain select * from t1 where\n", + (select + group_concat(concat("a1=", seq, " and a2=", seq, " ") separator "\nor " ) + from seq_1_to_30) + ); + +set optimizer_trace=1; +prepare s from @query; +execute s; +set @trace=json_extract((select trace from information_schema.optimizer_trace), '$**.range_analysis'); + +--echo # Observe that "key1" is a a part of several index_merge_union: +select json_pretty(json_search(@trace, 'all', 'key1')); + +--echo # +--echo # Now, same as above but for a long IN-list +--echo # +set @query= concat( + "explain select * from t1 where\n", + (select + group_concat(concat("a1=", seq, " and a2=", seq, " ") separator "\nor " ) + from seq_1_to_120) + ); + +set optimizer_trace=1; +prepare s from @query; +execute s; +set @trace=json_extract((select trace from information_schema.optimizer_trace), '$**.range_analysis'); + +--echo # Observe that there are NO index_merge_union candidates. Only one potential range scan: +select json_pretty(json_search(@trace, 'all', 'key1')); +drop table t1; + diff -Nru mariadb-10.11.11/mysql-test/main/secondary_key_costs.result mariadb-10.11.13/mysql-test/main/secondary_key_costs.result --- mariadb-10.11.11/mysql-test/main/secondary_key_costs.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/secondary_key_costs.result 2025-05-19 16:14:24.000000000 +0000 @@ -177,4 +177,80 @@ drop table t1,t2; set global userstat=@save_userstat; set global innodb_stats_persistent_sample_pages=@save_ispsp; +# +# MDEV-35958: Cost estimates for materialized derived tables are poor +# +set optimizer_trace=1; +create table t1 ( +a int +); +insert into t1 select seq from seq_1_to_10000; +explain +select * +from +t1 as t1_base, +(select a from t1 limit 10000) as TBL; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1_base ALL NULL NULL NULL NULL 10000 +1 PRIMARY ALL NULL NULL NULL NULL 10000 Using join buffer (flat, BNL join) +2 DERIVED t1 ALL NULL NULL NULL NULL 10000 +set @trace=(select trace from information_schema.optimizer_trace); +# BEFORE, without fix_derived_table_read_cost: derived2 has cost=rows=10000 +select json_detailed( +json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]') +) as Trace; +Trace +[ + { + "table": "t1_base", + "table_scan": + { + "rows": 10000, + "cost": 19.08984375 + } + }, + { + "table": "", + "table_scan": + { + "rows": 10000, + "cost": 10000 + } + } +] +set optimizer_adjust_secondary_key_costs='fix_derived_table_read_cost'; +explain +select * +from +t1 as t1_base, +(select a from t1 limit 10000) as TBL; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1_base ALL NULL NULL NULL NULL 10000 +1 PRIMARY ALL NULL NULL NULL NULL 10000 Using join buffer (flat, BNL join) +2 DERIVED t1 ALL NULL NULL NULL NULL 10000 +set @trace=(select trace from information_schema.optimizer_trace); +# AFTER, with fix_derived_table_read_cost: derived2 has more realistic cost +select json_detailed( +json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]') +) as Trace; +Trace +[ + { + "table": "t1_base", + "table_scan": + { + "rows": 10000, + "cost": 19.08984375 + } + }, + { + "table": "", + "table_scan": + { + "rows": 10000, + "cost": 501 + } + } +] +drop table t1; set @@optimizer_adjust_secondary_key_costs=default; diff -Nru mariadb-10.11.11/mysql-test/main/secondary_key_costs.test mariadb-10.11.13/mysql-test/main/secondary_key_costs.test --- mariadb-10.11.11/mysql-test/main/secondary_key_costs.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/secondary_key_costs.test 2025-05-19 16:14:24.000000000 +0000 @@ -109,4 +109,41 @@ set global userstat=@save_userstat; set global innodb_stats_persistent_sample_pages=@save_ispsp; +--echo # +--echo # MDEV-35958: Cost estimates for materialized derived tables are poor +--echo # +set optimizer_trace=1; +create table t1 ( + a int +); +insert into t1 select seq from seq_1_to_10000; + +explain +select * +from + t1 as t1_base, + (select a from t1 limit 10000) as TBL; + +set @trace=(select trace from information_schema.optimizer_trace); +--echo # BEFORE, without fix_derived_table_read_cost: derived2 has cost=rows=10000 +select json_detailed( + json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]') + ) as Trace; + +set optimizer_adjust_secondary_key_costs='fix_derived_table_read_cost'; + +explain +select * +from + t1 as t1_base, + (select a from t1 limit 10000) as TBL; + +set @trace=(select trace from information_schema.optimizer_trace); +--echo # AFTER, with fix_derived_table_read_cost: derived2 has more realistic cost +select json_detailed( + json_extract(json_extract(@trace, '$**.rows_estimation'), '$[1]') + ) as Trace; + +drop table t1; + set @@optimizer_adjust_secondary_key_costs=default; diff -Nru mariadb-10.11.11/mysql-test/main/skip_grants.result mariadb-10.11.13/mysql-test/main/skip_grants.result --- mariadb-10.11.11/mysql-test/main/skip_grants.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/skip_grants.result 2025-05-19 16:14:24.000000000 +0000 @@ -138,6 +138,14 @@ # End of 10.3 tests # # +# MDEV-34501: SIGSEGV in pfs_start_mutex_wait_v1, __strlen_avx2, or __strlen_evex from safe_mutex_lock on CREATE DEFINER when using skip-grant-tables +# +CREATE DEFINER=a PROCEDURE p() SELECT 1; +CREATE DEFINER=a FUNCTION f() RETURNS INT RETURN 100; +DROP PROCEDURE p; +DROP FUNCTION f; +# End of 10.5 tests +# # MDEV-24815 Show "--skip-grant-tables" state in SYSTEM VARIABLES # SELECT @@skip_grant_tables AS EXPECT_1; diff -Nru mariadb-10.11.11/mysql-test/main/skip_grants.test mariadb-10.11.13/mysql-test/main/skip_grants.test --- mariadb-10.11.11/mysql-test/main/skip_grants.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/skip_grants.test 2025-05-19 16:14:24.000000000 +0000 @@ -170,6 +170,17 @@ --echo # --echo # +--echo # MDEV-34501: SIGSEGV in pfs_start_mutex_wait_v1, __strlen_avx2, or __strlen_evex from safe_mutex_lock on CREATE DEFINER when using skip-grant-tables +--echo # +CREATE DEFINER=a PROCEDURE p() SELECT 1; +CREATE DEFINER=a FUNCTION f() RETURNS INT RETURN 100; + +DROP PROCEDURE p; +DROP FUNCTION f; + +--echo # End of 10.5 tests + +--echo # --echo # MDEV-24815 Show "--skip-grant-tables" state in SYSTEM VARIABLES --echo # diff -Nru mariadb-10.11.11/mysql-test/main/sp-bugs.result mariadb-10.11.13/mysql-test/main/sp-bugs.result --- mariadb-10.11.11/mysql-test/main/sp-bugs.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/sp-bugs.result 2025-05-19 16:14:24.000000000 +0000 @@ -388,5 +388,14 @@ DROP PROCEDURE p2; DROP TABLE t1, t2; # +# MDEV-34501: SIGSEGV in pfs_start_mutex_wait_v1, __strlen_avx2, or __strlen_evex from safe_mutex_lock on CREATE DEFINER when using skip-grant-tables +# +# This test is a duplicate of the one located in the file skip_grants.test +# and placed here to check the same test case against embedded-server +CREATE DEFINER=a PROCEDURE p() SELECT 1; +CREATE DEFINER=a FUNCTION f() RETURNS INT RETURN 100; +DROP PROCEDURE p; +DROP FUNCTION f; +# # End of 10.5 tests # diff -Nru mariadb-10.11.11/mysql-test/main/sp-bugs.test mariadb-10.11.13/mysql-test/main/sp-bugs.test --- mariadb-10.11.11/mysql-test/main/sp-bugs.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/sp-bugs.test 2025-05-19 16:14:24.000000000 +0000 @@ -415,5 +415,25 @@ DROP TABLE t1, t2; --echo # +--echo # MDEV-34501: SIGSEGV in pfs_start_mutex_wait_v1, __strlen_avx2, or __strlen_evex from safe_mutex_lock on CREATE DEFINER when using skip-grant-tables +--echo # +--echo # This test is a duplicate of the one located in the file skip_grants.test +--echo # and placed here to check the same test case against embedded-server + +# Disable warnings before running the following CREATE PROCEDURE/FUNCTION +# statement since the warning message +# "The user specified as a definer ('a'@'%') does not exist" +# is output in case the test be run against a regular server +# and isn't output if embedded server is used (@sa sp_process_definer() +# in sql_parse.cc). +--disable_warnings +CREATE DEFINER=a PROCEDURE p() SELECT 1; +CREATE DEFINER=a FUNCTION f() RETURNS INT RETURN 100; +--enable_warnings + +DROP PROCEDURE p; +DROP FUNCTION f; + +--echo # --echo # End of 10.5 tests --echo # diff -Nru mariadb-10.11.11/mysql-test/main/sp-row.result mariadb-10.11.13/mysql-test/main/sp-row.result --- mariadb-10.11.11/mysql-test/main/sp-row.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/sp-row.result 2025-05-19 16:14:24.000000000 +0000 @@ -2313,3 +2313,44 @@ END; $$ ERROR 21000: Operand should contain 1 column(s) +# Start of 10.6 tests +# +# MDEV-36179 Assertion `0' failed in virtual bool Type_handler_row::Item_save_in_value(THD*, Item*, st_value*) const +# +CREATE PROCEDURE p0 (IN a ROW(a INT,b INT)) +BEGIN +SET a=ROW(0,0); +END; +/ +PREPARE s0 FROM 'CALL p0(?)'; +EXECUTE s0 USING @a; +ERROR HY000: Illegal parameter data type row for operation 'EXECUTE ... USING ?' +DROP PROCEDURE p0; +CREATE PROCEDURE p0 (INOUT a ROW(a INT,b INT)) +BEGIN +SET a=ROW(0,0); +END; +/ +PREPARE s0 FROM 'CALL p0(?)'; +EXECUTE s0 USING @a; +ERROR HY000: Illegal parameter data type row for operation 'EXECUTE ... USING ?' +DROP PROCEDURE p0; +CREATE PROCEDURE p0 (OUT a ROW(a INT,b INT)) +BEGIN +SET a=ROW(0,0); +END; +/ +PREPARE s0 FROM 'CALL p0(?)'; +EXECUTE s0 USING @a; +ERROR HY000: Illegal parameter data type row for operation 'EXECUTE ... USING ?' +DROP PROCEDURE p0; +CREATE FUNCTION f0(a ROW(a INT,b INT)) RETURNS BOOLEAN +BEGIN +RETURN FALSE; +END; +/ +PREPARE s0 FROM 'SELECT f0(?)'; +EXECUTE s0 USING @a; +ERROR HY000: Illegal parameter data type row for operation 'EXECUTE ... USING ?' +DROP FUNCTION f0; +# End of 10.6 tests diff -Nru mariadb-10.11.11/mysql-test/main/sp-row.test mariadb-10.11.13/mysql-test/main/sp-row.test --- mariadb-10.11.11/mysql-test/main/sp-row.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/sp-row.test 2025-05-19 16:14:24.000000000 +0000 @@ -1544,3 +1544,64 @@ END; $$ DELIMITER ;$$ + + +--echo # Start of 10.6 tests + + +--echo # +--echo # MDEV-36179 Assertion `0' failed in virtual bool Type_handler_row::Item_save_in_value(THD*, Item*, st_value*) const +--echo # + +DELIMITER /; +CREATE PROCEDURE p0 (IN a ROW(a INT,b INT)) +BEGIN + SET a=ROW(0,0); +END; +/ +DELIMITER ;/ +PREPARE s0 FROM 'CALL p0(?)'; +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +EXECUTE s0 USING @a; +DROP PROCEDURE p0; + + +DELIMITER /; +CREATE PROCEDURE p0 (INOUT a ROW(a INT,b INT)) +BEGIN + SET a=ROW(0,0); +END; +/ +DELIMITER ;/ +PREPARE s0 FROM 'CALL p0(?)'; +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +EXECUTE s0 USING @a; +DROP PROCEDURE p0; + + +DELIMITER /; +CREATE PROCEDURE p0 (OUT a ROW(a INT,b INT)) +BEGIN + SET a=ROW(0,0); +END; +/ +DELIMITER ;/ +PREPARE s0 FROM 'CALL p0(?)'; +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +EXECUTE s0 USING @a; +DROP PROCEDURE p0; + + +DELIMITER /; +CREATE FUNCTION f0(a ROW(a INT,b INT)) RETURNS BOOLEAN +BEGIN + RETURN FALSE; +END; +/ +DELIMITER ;/ +PREPARE s0 FROM 'SELECT f0(?)'; +--error ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION +EXECUTE s0 USING @a; +DROP FUNCTION f0; + +--echo # End of 10.6 tests diff -Nru mariadb-10.11.11/mysql-test/main/subselect.result mariadb-10.11.13/mysql-test/main/subselect.result --- mariadb-10.11.11/mysql-test/main/subselect.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect.result 2025-05-19 16:14:24.000000000 +0000 @@ -679,22 +679,24 @@ insert into t2 values (1); insert into t3 values (1),(2); INSERT INTO t1 (x) VALUES ((SELECT x FROM t1)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data INSERT INTO t1 (x) VALUES ((SELECT b FROM t3)); ERROR 21000: Subquery returns more than 1 row INSERT INTO t1 (x) VALUES ((SELECT a FROM t2)); select * from t1; x +NULL 1 insert into t2 values (1); INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 INSERT INTO t1 (x) select (SELECT SUM(a)+1 FROM t2) FROM t2; select * from t1; x +NULL 1 2 3 @@ -702,6 +704,7 @@ INSERT INTO t1 (x) select (SELECT SUM(x)+2 FROM t1) FROM t2; select * from t1; x +NULL 1 2 3 @@ -711,6 +714,7 @@ INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 3 @@ -727,7 +731,7 @@ select * from t1; x y replace into t1 (x, y) VALUES ((SELECT x FROM t1), (SELECT a+1 FROM t2)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 23000: Column 'x' cannot be null replace into t1 (x, y) VALUES ((SELECT a FROM t3), (SELECT a+1 FROM t2)); ERROR 21000: Subquery returns more than 1 row replace into t1 (x, y) VALUES ((SELECT a FROM t2), (SELECT a+1 FROM t2)); @@ -795,13 +799,21 @@ id 2 INSERT INTO t2 VALUES ((SELECT * FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row INSERT INTO t2 VALUES ((SELECT id FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row +select * from t2; +id +1 +2 +INSERT INTO t2 VALUES ((SELECT count(*) FROM t2)); +INSERT INTO t2 VALUES ((SELECT max(id) FROM t2)); SELECT * FROM t2; id 1 2 +2 +2 CREATE TABLE t1 (id int(11) default NULL, KEY id (id)) ENGINE=MyISAM CHARSET=latin1; INSERT INTO t1 values (1),(1); UPDATE t2 SET id=(SELECT * FROM t1); diff -Nru mariadb-10.11.11/mysql-test/main/subselect.test mariadb-10.11.13/mysql-test/main/subselect.test --- mariadb-10.11.11/mysql-test/main/subselect.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect.test 2025-05-19 16:14:24.000000000 +0000 @@ -419,7 +419,6 @@ create table t3 (b int); insert into t2 values (1); insert into t3 values (1),(2); --- error ER_UPDATE_TABLE_USED INSERT INTO t1 (x) VALUES ((SELECT x FROM t1)); -- error ER_SUBQUERY_NO_1_ROW INSERT INTO t1 (x) VALUES ((SELECT b FROM t3)); @@ -454,7 +453,7 @@ insert into t2 values (1); insert into t3 values (1),(2); select * from t1; --- error ER_UPDATE_TABLE_USED +-- error ER_BAD_NULL_ERROR replace into t1 (x, y) VALUES ((SELECT x FROM t1), (SELECT a+1 FROM t2)); -- error ER_SUBQUERY_NO_1_ROW replace into t1 (x, y) VALUES ((SELECT a FROM t3), (SELECT a+1 FROM t2)); @@ -494,10 +493,13 @@ --disable_prepare_warnings SELECT * FROM t2 WHERE id IN (SELECT 5 UNION SELECT 3); SELECT * FROM t2 WHERE id IN (SELECT 5 UNION SELECT 2); --- error ER_UPDATE_TABLE_USED +-- error ER_SUBQUERY_NO_1_ROW INSERT INTO t2 VALUES ((SELECT * FROM t2)); --- error ER_UPDATE_TABLE_USED +-- error ER_SUBQUERY_NO_1_ROW INSERT INTO t2 VALUES ((SELECT id FROM t2)); +select * from t2; +INSERT INTO t2 VALUES ((SELECT count(*) FROM t2)); +INSERT INTO t2 VALUES ((SELECT max(id) FROM t2)); SELECT * FROM t2; CREATE TABLE t1 (id int(11) default NULL, KEY id (id)) ENGINE=MyISAM CHARSET=latin1; INSERT INTO t1 values (1),(1); diff -Nru mariadb-10.11.11/mysql-test/main/subselect_elimination.result mariadb-10.11.13/mysql-test/main/subselect_elimination.result --- mariadb-10.11.11/mysql-test/main/subselect_elimination.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect_elimination.result 2025-05-19 16:14:24.000000000 +0000 @@ -136,12 +136,22 @@ # access within null pointer CREATE TABLE x (x INT) ENGINE=InnoDB; INSERT INTO x (x) VALUES (0); +select NULL IN (SELECT (SELECT x FROM (SELECT x FROM +(SELECT 0 IN (SELECT x=0 FROM (SELECT x FROM (SELECT (SELECT (SELECT (SELECT +(SELECT 0 AS x) FROM x AS x) IN (SELECT 0 AS x) AS x) FROM x AS x) IN +(SELECT x WHERE x=0) AS x FROM x AS x) AS x) AS x GROUP BY x) AS x FROM x) AS x) +AS x) IN (SELECT 0 AS x) AS x FROM x) as exp; +exp +NULL INSERT INTO x (x) VALUES (x IN (SELECT (SELECT x FROM (SELECT x FROM (SELECT 0 IN (SELECT x=0 FROM (SELECT x FROM (SELECT (SELECT (SELECT (SELECT (SELECT 0 AS x) FROM x AS x) IN (SELECT 0 AS x) AS x) FROM x AS x) IN (SELECT x WHERE x=0) AS x FROM x AS x) AS x) AS x GROUP BY x) AS x FROM x) AS x) AS x) IN (SELECT 0 AS x) AS x FROM x)); -ERROR HY000: Table 'x' is specified twice, both as a target for 'INSERT' and as a separate source for data +select * from x; +x +0 +NULL DROP TABLE x; # MDEV-28622: Item_subselect eliminated flag set but Item still # evaluated/used. diff -Nru mariadb-10.11.11/mysql-test/main/subselect_elimination.test mariadb-10.11.13/mysql-test/main/subselect_elimination.test --- mariadb-10.11.11/mysql-test/main/subselect_elimination.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect_elimination.test 2025-05-19 16:14:24.000000000 +0000 @@ -133,12 +133,17 @@ CREATE TABLE x (x INT) ENGINE=InnoDB; INSERT INTO x (x) VALUES (0); ---error ER_UPDATE_TABLE_USED +select NULL IN (SELECT (SELECT x FROM (SELECT x FROM +(SELECT 0 IN (SELECT x=0 FROM (SELECT x FROM (SELECT (SELECT (SELECT (SELECT +(SELECT 0 AS x) FROM x AS x) IN (SELECT 0 AS x) AS x) FROM x AS x) IN +(SELECT x WHERE x=0) AS x FROM x AS x) AS x) AS x GROUP BY x) AS x FROM x) AS x) +AS x) IN (SELECT 0 AS x) AS x FROM x) as exp; INSERT INTO x (x) VALUES (x IN (SELECT (SELECT x FROM (SELECT x FROM (SELECT 0 IN (SELECT x=0 FROM (SELECT x FROM (SELECT (SELECT (SELECT (SELECT (SELECT 0 AS x) FROM x AS x) IN (SELECT 0 AS x) AS x) FROM x AS x) IN (SELECT x WHERE x=0) AS x FROM x AS x) AS x) AS x GROUP BY x) AS x FROM x) AS x) AS x) IN (SELECT 0 AS x) AS x FROM x)); +select * from x; DROP TABLE x; --echo # MDEV-28622: Item_subselect eliminated flag set but Item still diff -Nru mariadb-10.11.11/mysql-test/main/subselect_no_exists_to_in.result mariadb-10.11.13/mysql-test/main/subselect_no_exists_to_in.result --- mariadb-10.11.11/mysql-test/main/subselect_no_exists_to_in.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect_no_exists_to_in.result 2025-05-19 16:14:24.000000000 +0000 @@ -683,22 +683,24 @@ insert into t2 values (1); insert into t3 values (1),(2); INSERT INTO t1 (x) VALUES ((SELECT x FROM t1)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data INSERT INTO t1 (x) VALUES ((SELECT b FROM t3)); ERROR 21000: Subquery returns more than 1 row INSERT INTO t1 (x) VALUES ((SELECT a FROM t2)); select * from t1; x +NULL 1 insert into t2 values (1); INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 INSERT INTO t1 (x) select (SELECT SUM(a)+1 FROM t2) FROM t2; select * from t1; x +NULL 1 2 3 @@ -706,6 +708,7 @@ INSERT INTO t1 (x) select (SELECT SUM(x)+2 FROM t1) FROM t2; select * from t1; x +NULL 1 2 3 @@ -715,6 +718,7 @@ INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 3 @@ -731,7 +735,7 @@ select * from t1; x y replace into t1 (x, y) VALUES ((SELECT x FROM t1), (SELECT a+1 FROM t2)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 23000: Column 'x' cannot be null replace into t1 (x, y) VALUES ((SELECT a FROM t3), (SELECT a+1 FROM t2)); ERROR 21000: Subquery returns more than 1 row replace into t1 (x, y) VALUES ((SELECT a FROM t2), (SELECT a+1 FROM t2)); @@ -799,13 +803,21 @@ id 2 INSERT INTO t2 VALUES ((SELECT * FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row INSERT INTO t2 VALUES ((SELECT id FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row +select * from t2; +id +1 +2 +INSERT INTO t2 VALUES ((SELECT count(*) FROM t2)); +INSERT INTO t2 VALUES ((SELECT max(id) FROM t2)); SELECT * FROM t2; id 1 2 +2 +2 CREATE TABLE t1 (id int(11) default NULL, KEY id (id)) ENGINE=MyISAM CHARSET=latin1; INSERT INTO t1 values (1),(1); UPDATE t2 SET id=(SELECT * FROM t1); diff -Nru mariadb-10.11.11/mysql-test/main/subselect_no_mat.result mariadb-10.11.13/mysql-test/main/subselect_no_mat.result --- mariadb-10.11.11/mysql-test/main/subselect_no_mat.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect_no_mat.result 2025-05-19 16:14:24.000000000 +0000 @@ -686,22 +686,24 @@ insert into t2 values (1); insert into t3 values (1),(2); INSERT INTO t1 (x) VALUES ((SELECT x FROM t1)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data INSERT INTO t1 (x) VALUES ((SELECT b FROM t3)); ERROR 21000: Subquery returns more than 1 row INSERT INTO t1 (x) VALUES ((SELECT a FROM t2)); select * from t1; x +NULL 1 insert into t2 values (1); INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 INSERT INTO t1 (x) select (SELECT SUM(a)+1 FROM t2) FROM t2; select * from t1; x +NULL 1 2 3 @@ -709,6 +711,7 @@ INSERT INTO t1 (x) select (SELECT SUM(x)+2 FROM t1) FROM t2; select * from t1; x +NULL 1 2 3 @@ -718,6 +721,7 @@ INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 3 @@ -734,7 +738,7 @@ select * from t1; x y replace into t1 (x, y) VALUES ((SELECT x FROM t1), (SELECT a+1 FROM t2)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 23000: Column 'x' cannot be null replace into t1 (x, y) VALUES ((SELECT a FROM t3), (SELECT a+1 FROM t2)); ERROR 21000: Subquery returns more than 1 row replace into t1 (x, y) VALUES ((SELECT a FROM t2), (SELECT a+1 FROM t2)); @@ -802,13 +806,21 @@ id 2 INSERT INTO t2 VALUES ((SELECT * FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row INSERT INTO t2 VALUES ((SELECT id FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row +select * from t2; +id +1 +2 +INSERT INTO t2 VALUES ((SELECT count(*) FROM t2)); +INSERT INTO t2 VALUES ((SELECT max(id) FROM t2)); SELECT * FROM t2; id 1 2 +2 +2 CREATE TABLE t1 (id int(11) default NULL, KEY id (id)) ENGINE=MyISAM CHARSET=latin1; INSERT INTO t1 values (1),(1); UPDATE t2 SET id=(SELECT * FROM t1); diff -Nru mariadb-10.11.11/mysql-test/main/subselect_no_opts.result mariadb-10.11.13/mysql-test/main/subselect_no_opts.result --- mariadb-10.11.11/mysql-test/main/subselect_no_opts.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect_no_opts.result 2025-05-19 16:14:24.000000000 +0000 @@ -682,22 +682,24 @@ insert into t2 values (1); insert into t3 values (1),(2); INSERT INTO t1 (x) VALUES ((SELECT x FROM t1)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data INSERT INTO t1 (x) VALUES ((SELECT b FROM t3)); ERROR 21000: Subquery returns more than 1 row INSERT INTO t1 (x) VALUES ((SELECT a FROM t2)); select * from t1; x +NULL 1 insert into t2 values (1); INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 INSERT INTO t1 (x) select (SELECT SUM(a)+1 FROM t2) FROM t2; select * from t1; x +NULL 1 2 3 @@ -705,6 +707,7 @@ INSERT INTO t1 (x) select (SELECT SUM(x)+2 FROM t1) FROM t2; select * from t1; x +NULL 1 2 3 @@ -714,6 +717,7 @@ INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 3 @@ -730,7 +734,7 @@ select * from t1; x y replace into t1 (x, y) VALUES ((SELECT x FROM t1), (SELECT a+1 FROM t2)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 23000: Column 'x' cannot be null replace into t1 (x, y) VALUES ((SELECT a FROM t3), (SELECT a+1 FROM t2)); ERROR 21000: Subquery returns more than 1 row replace into t1 (x, y) VALUES ((SELECT a FROM t2), (SELECT a+1 FROM t2)); @@ -798,13 +802,21 @@ id 2 INSERT INTO t2 VALUES ((SELECT * FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row INSERT INTO t2 VALUES ((SELECT id FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row +select * from t2; +id +1 +2 +INSERT INTO t2 VALUES ((SELECT count(*) FROM t2)); +INSERT INTO t2 VALUES ((SELECT max(id) FROM t2)); SELECT * FROM t2; id 1 2 +2 +2 CREATE TABLE t1 (id int(11) default NULL, KEY id (id)) ENGINE=MyISAM CHARSET=latin1; INSERT INTO t1 values (1),(1); UPDATE t2 SET id=(SELECT * FROM t1); diff -Nru mariadb-10.11.11/mysql-test/main/subselect_no_scache.result mariadb-10.11.13/mysql-test/main/subselect_no_scache.result --- mariadb-10.11.11/mysql-test/main/subselect_no_scache.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect_no_scache.result 2025-05-19 16:14:24.000000000 +0000 @@ -685,22 +685,24 @@ insert into t2 values (1); insert into t3 values (1),(2); INSERT INTO t1 (x) VALUES ((SELECT x FROM t1)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data INSERT INTO t1 (x) VALUES ((SELECT b FROM t3)); ERROR 21000: Subquery returns more than 1 row INSERT INTO t1 (x) VALUES ((SELECT a FROM t2)); select * from t1; x +NULL 1 insert into t2 values (1); INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 INSERT INTO t1 (x) select (SELECT SUM(a)+1 FROM t2) FROM t2; select * from t1; x +NULL 1 2 3 @@ -708,6 +710,7 @@ INSERT INTO t1 (x) select (SELECT SUM(x)+2 FROM t1) FROM t2; select * from t1; x +NULL 1 2 3 @@ -717,6 +720,7 @@ INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 3 @@ -733,7 +737,7 @@ select * from t1; x y replace into t1 (x, y) VALUES ((SELECT x FROM t1), (SELECT a+1 FROM t2)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 23000: Column 'x' cannot be null replace into t1 (x, y) VALUES ((SELECT a FROM t3), (SELECT a+1 FROM t2)); ERROR 21000: Subquery returns more than 1 row replace into t1 (x, y) VALUES ((SELECT a FROM t2), (SELECT a+1 FROM t2)); @@ -801,13 +805,21 @@ id 2 INSERT INTO t2 VALUES ((SELECT * FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row INSERT INTO t2 VALUES ((SELECT id FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row +select * from t2; +id +1 +2 +INSERT INTO t2 VALUES ((SELECT count(*) FROM t2)); +INSERT INTO t2 VALUES ((SELECT max(id) FROM t2)); SELECT * FROM t2; id 1 2 +2 +2 CREATE TABLE t1 (id int(11) default NULL, KEY id (id)) ENGINE=MyISAM CHARSET=latin1; INSERT INTO t1 values (1),(1); UPDATE t2 SET id=(SELECT * FROM t1); diff -Nru mariadb-10.11.11/mysql-test/main/subselect_no_semijoin.result mariadb-10.11.13/mysql-test/main/subselect_no_semijoin.result --- mariadb-10.11.11/mysql-test/main/subselect_no_semijoin.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/subselect_no_semijoin.result 2025-05-19 16:14:24.000000000 +0000 @@ -682,22 +682,24 @@ insert into t2 values (1); insert into t3 values (1),(2); INSERT INTO t1 (x) VALUES ((SELECT x FROM t1)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data INSERT INTO t1 (x) VALUES ((SELECT b FROM t3)); ERROR 21000: Subquery returns more than 1 row INSERT INTO t1 (x) VALUES ((SELECT a FROM t2)); select * from t1; x +NULL 1 insert into t2 values (1); INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 INSERT INTO t1 (x) select (SELECT SUM(a)+1 FROM t2) FROM t2; select * from t1; x +NULL 1 2 3 @@ -705,6 +707,7 @@ INSERT INTO t1 (x) select (SELECT SUM(x)+2 FROM t1) FROM t2; select * from t1; x +NULL 1 2 3 @@ -714,6 +717,7 @@ INSERT DELAYED INTO t1 (x) VALUES ((SELECT SUM(a) FROM t2)); select * from t1; x +NULL 1 2 3 @@ -730,7 +734,7 @@ select * from t1; x y replace into t1 (x, y) VALUES ((SELECT x FROM t1), (SELECT a+1 FROM t2)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 23000: Column 'x' cannot be null replace into t1 (x, y) VALUES ((SELECT a FROM t3), (SELECT a+1 FROM t2)); ERROR 21000: Subquery returns more than 1 row replace into t1 (x, y) VALUES ((SELECT a FROM t2), (SELECT a+1 FROM t2)); @@ -798,13 +802,21 @@ id 2 INSERT INTO t2 VALUES ((SELECT * FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row INSERT INTO t2 VALUES ((SELECT id FROM t2)); -ERROR HY000: Table 't2' is specified twice, both as a target for 'INSERT' and as a separate source for data +ERROR 21000: Subquery returns more than 1 row +select * from t2; +id +1 +2 +INSERT INTO t2 VALUES ((SELECT count(*) FROM t2)); +INSERT INTO t2 VALUES ((SELECT max(id) FROM t2)); SELECT * FROM t2; id 1 2 +2 +2 CREATE TABLE t1 (id int(11) default NULL, KEY id (id)) ENGINE=MyISAM CHARSET=latin1; INSERT INTO t1 values (1),(1); UPDATE t2 SET id=(SELECT * FROM t1); diff -Nru mariadb-10.11.11/mysql-test/main/temp_table_frm.result mariadb-10.11.13/mysql-test/main/temp_table_frm.result --- mariadb-10.11.11/mysql-test/main/temp_table_frm.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/temp_table_frm.result 2025-05-19 16:14:24.000000000 +0000 @@ -25,3 +25,9 @@ set @@use_stat_tables= @save_use_stat_tables; set @@optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; drop table t1; +# +# MDEV-36138 Server null-pointer crash at startup when tmptables left in --tmpdir +# +create table t1 (c int); +drop table t1; +# restart diff -Nru mariadb-10.11.11/mysql-test/main/temp_table_frm.test mariadb-10.11.13/mysql-test/main/temp_table_frm.test --- mariadb-10.11.11/mysql-test/main/temp_table_frm.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/temp_table_frm.test 2025-05-19 16:14:24.000000000 +0000 @@ -24,4 +24,15 @@ from information_schema.session_status join t1 using (variable_name); set @@use_stat_tables= @save_use_stat_tables; set @@optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; -drop table t1; \ No newline at end of file +drop table t1; + +--echo # +--echo # MDEV-36138 Server null-pointer crash at startup when tmptables left in --tmpdir +--echo # + +create table t1 (c int); +let $MYSQLD_TMPDIR=`SELECT @@tmpdir`; +let $MYSQLD_DATADIR=`SELECT @@datadir`; +--copy_file $MYSQLD_DATADIR/test/t1.frm $MYSQLD_TMPDIR/#sqlt1.frm +drop table t1; +--source include/restart_mysqld.inc diff -Nru mariadb-10.11.11/mysql-test/main/timezone.test mariadb-10.11.13/mysql-test/main/timezone.test --- mariadb-10.11.11/mysql-test/main/timezone.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/timezone.test 2025-05-19 16:14:24.000000000 +0000 @@ -8,7 +8,7 @@ enable_query_log; # The following is because of daylight saving time ---replace_result MEST CET MET CET +--replace_result MEST CET MET CET CEST CET show variables like "system_time_zone"; --echo # diff -Nru mariadb-10.11.11/mysql-test/main/trigger_null.result mariadb-10.11.13/mysql-test/main/trigger_null.result --- mariadb-10.11.11/mysql-test/main/trigger_null.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/trigger_null.result 2025-05-19 16:14:24.000000000 +0000 @@ -399,4 +399,19 @@ Warning 1364 Field 'c5' doesn't have a default value drop table t1; set sql_mode=default; +# +# MDEV-36026 Problem with INSERT SELECT on NOT NULL columns while having BEFORE UPDATE trigger +# +create table t1 (b int(11) not null); +create trigger t1bu before update on t1 for each row begin end; +insert t1 (b) select 1 union select 2; +create trigger trgi before insert on t1 for each row set new.b=ifnull(new.b,10); +insert t1 (b) select NULL union select 11; +select * from t1; +b +1 +2 +10 +11 +drop table t1; # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/trigger_null.test mariadb-10.11.13/mysql-test/main/trigger_null.test --- mariadb-10.11.11/mysql-test/main/trigger_null.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/trigger_null.test 2025-05-19 16:14:24.000000000 +0000 @@ -425,4 +425,15 @@ drop table t1; set sql_mode=default; +--echo # +--echo # MDEV-36026 Problem with INSERT SELECT on NOT NULL columns while having BEFORE UPDATE trigger +--echo # +create table t1 (b int(11) not null); +create trigger t1bu before update on t1 for each row begin end; +insert t1 (b) select 1 union select 2; +create trigger trgi before insert on t1 for each row set new.b=ifnull(new.b,10); +insert t1 (b) select NULL union select 11; +select * from t1; +drop table t1; + --echo # End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/main/type_binary.result mariadb-10.11.13/mysql-test/main/type_binary.result --- mariadb-10.11.11/mysql-test/main/type_binary.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/type_binary.result 2025-05-19 16:14:24.000000000 +0000 @@ -397,3 +397,61 @@ DROP TABLE t2; DROP TABLE t1; SET note_verbosity=DEFAULT; +# +# MDEV-36235 Incorrect result for BETWEEN over unique blob prefix +# +CREATE TABLE t1 (c1 BINARY(16), UNIQUE (c1)); +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2); +SELECT HEX(c1) FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +HEX(c1) +31000000000000000000000000000000 +32000000000000000000000000000000 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: '-1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: '-2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: '1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: '2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +SELECT HEX(c1) FROM t1 IGNORE KEY(c1) WHERE 'a' BETWEEN 0 AND (c1); +HEX(c1) +31000000000000000000000000000000 +32000000000000000000000000000000 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: '-2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: '-1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: '1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: '2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +SELECT HEX(c1) FROM t1 WHERE '#' BETWEEN c1 AND 0; +HEX(c1) +2D310000000000000000000000000000 +2D320000000000000000000000000000 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '-1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '-2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +SELECT HEX(c1) FROM t1 IGNORE KEY(c1) WHERE '#' BETWEEN c1 AND 0; +HEX(c1) +2D320000000000000000000000000000 +2D310000000000000000000000000000 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '-2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '-1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/main/type_binary.test mariadb-10.11.13/mysql-test/main/type_binary.test --- mariadb-10.11.11/mysql-test/main/type_binary.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/type_binary.test 2025-05-19 16:14:24.000000000 +0000 @@ -178,3 +178,14 @@ --source unusable_keys_joins.inc DROP TABLE t1; SET note_verbosity=DEFAULT; + +--echo # +--echo # MDEV-36235 Incorrect result for BETWEEN over unique blob prefix +--echo # +CREATE TABLE t1 (c1 BINARY(16), UNIQUE (c1)); +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2); +SELECT HEX(c1) FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +SELECT HEX(c1) FROM t1 IGNORE KEY(c1) WHERE 'a' BETWEEN 0 AND (c1); +SELECT HEX(c1) FROM t1 WHERE '#' BETWEEN c1 AND 0; +SELECT HEX(c1) FROM t1 IGNORE KEY(c1) WHERE '#' BETWEEN c1 AND 0; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/main/type_blob.result mariadb-10.11.13/mysql-test/main/type_blob.result --- mariadb-10.11.11/mysql-test/main/type_blob.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/type_blob.result 2025-05-19 16:14:24.000000000 +0000 @@ -1419,3 +1419,193 @@ DROP TABLE t2; DROP TABLE t1; SET note_verbosity=DEFAULT; +# +# MDEV-36235 Incorrect result for BETWEEN over unique blob prefix +# +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1(2))) engine=myisam; +INSERT INTO t1 (c1) VALUES (1); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +DROP TABLE t1; +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1(2))); +INSERT INTO t1 (c1) VALUES (1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +2 +3 +4 +5 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +SELECT c1 FROM t1 WHERE 3 BETWEEN 10*POW(-1,c1) AND (c1); +c1 +3 +5 +SELECT c1 FROM t1 WHERE 'a' BETWEEN 10*POW(-1,c1) AND (c1); +c1 +1 +3 +5 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +DROP TABLE t1; +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1(2))) engine=myisam; +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +2 +3 +4 +5 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +c1 +-2 +-1 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +DROP TABLE t1; +CREATE TABLE t1 (c1 TINYBLOB NOT NULL); +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +2 +3 +4 +5 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +c1 +-2 +-1 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +DROP TABLE t1; +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1(2))) engine=innodb; +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +2 +3 +4 +5 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +c1 +-2 +-1 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +ALTER TABLE t1 engine=myisam; +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +2 +3 +4 +5 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +c1 +-2 +-1 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +DROP TABLE t1; +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1)) engine=innodb; +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +2 +3 +4 +5 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +c1 +-2 +-1 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/main/type_blob.test mariadb-10.11.13/mysql-test/main/type_blob.test --- mariadb-10.11.11/mysql-test/main/type_blob.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/type_blob.test 2025-05-19 16:14:24.000000000 +0000 @@ -808,3 +808,48 @@ --source unusable_keys_joins.inc DROP TABLE t1; SET note_verbosity=DEFAULT; + +--echo # +--echo # MDEV-36235 Incorrect result for BETWEEN over unique blob prefix +--echo # +# myisam has a special optimization for tables with one row +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1(2))) engine=myisam; +INSERT INTO t1 (c1) VALUES (1); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +DROP TABLE t1; + +# This case shows that we don't transform the entire WHERE clause +# into a range condition. +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1(2))); +INSERT INTO t1 (c1) VALUES (1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +SELECT c1 FROM t1 WHERE 3 BETWEEN 10*POW(-1,c1) AND (c1); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 10*POW(-1,c1) AND (c1); +DROP TABLE t1; + +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1(2))) engine=myisam; +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +DROP TABLE t1; + +CREATE TABLE t1 (c1 TINYBLOB NOT NULL); +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +DROP TABLE t1; + +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1(2))) engine=innodb; +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +ALTER TABLE t1 engine=myisam; +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +DROP TABLE t1; + +CREATE TABLE t1 (c1 TINYBLOB, UNIQUE (c1)) engine=innodb; +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2),(3),(4),(5); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/main/type_num_innodb.result mariadb-10.11.13/mysql-test/main/type_num_innodb.result --- mariadb-10.11.11/mysql-test/main/type_num_innodb.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/type_num_innodb.result 2025-05-19 16:14:24.000000000 +0000 @@ -46,23 +46,70 @@ SELECT * FROM t1,t2 WHERE a=d; a b c pk d e Warnings: -Warning 1292 Truncated incorrect DECIMAL value: 'd' -Warning 1292 Truncated incorrect DECIMAL value: 'd' -Warning 1292 Truncated incorrect DECIMAL value: 'f' -Warning 1292 Truncated incorrect DECIMAL value: 'f' -Warning 1292 Truncated incorrect DECIMAL value: 'g' -Warning 1292 Truncated incorrect DECIMAL value: 'k' -Warning 1292 Truncated incorrect DECIMAL value: 'm' -Warning 1292 Truncated incorrect DECIMAL value: 'm' -Warning 1292 Truncated incorrect DECIMAL value: 'm' -Warning 1292 Truncated incorrect DECIMAL value: 'o' -Warning 1292 Truncated incorrect DECIMAL value: 'q' -Warning 1292 Truncated incorrect DECIMAL value: 'r' -Warning 1292 Truncated incorrect DECIMAL value: 'u' -Warning 1292 Truncated incorrect DECIMAL value: 'w' -Warning 1292 Truncated incorrect DECIMAL value: 'x' -Warning 1292 Truncated incorrect DECIMAL value: 'x' -Warning 1292 Truncated incorrect DECIMAL value: 'y' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'g' +Warning 1292 Truncated incorrect DOUBLE value: 'k' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'o' +Warning 1292 Truncated incorrect DOUBLE value: 'q' +Warning 1292 Truncated incorrect DOUBLE value: 'r' +Warning 1292 Truncated incorrect DOUBLE value: 'u' +Warning 1292 Truncated incorrect DOUBLE value: 'w' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'y' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'g' +Warning 1292 Truncated incorrect DOUBLE value: 'k' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'o' +Warning 1292 Truncated incorrect DOUBLE value: 'q' +Warning 1292 Truncated incorrect DOUBLE value: 'r' +Warning 1292 Truncated incorrect DOUBLE value: 'u' +Warning 1292 Truncated incorrect DOUBLE value: 'w' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'y' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'g' +Warning 1292 Truncated incorrect DOUBLE value: 'k' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'o' +Warning 1292 Truncated incorrect DOUBLE value: 'q' +Warning 1292 Truncated incorrect DOUBLE value: 'r' +Warning 1292 Truncated incorrect DOUBLE value: 'u' +Warning 1292 Truncated incorrect DOUBLE value: 'w' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'y' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'g' +Warning 1292 Truncated incorrect DOUBLE value: 'k' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'o' +Warning 1292 Truncated incorrect DOUBLE value: 'q' +Warning 1292 Truncated incorrect DOUBLE value: 'r' +Warning 1292 Truncated incorrect DOUBLE value: 'u' ALTER TABLE t1 MODIFY a DOUBLE; SELECT * FROM t1,t2 WHERE a=d; a b c pk d e @@ -84,6 +131,53 @@ Warning 1292 Truncated incorrect DOUBLE value: 'x' Warning 1292 Truncated incorrect DOUBLE value: 'x' Warning 1292 Truncated incorrect DOUBLE value: 'y' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'g' +Warning 1292 Truncated incorrect DOUBLE value: 'k' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'o' +Warning 1292 Truncated incorrect DOUBLE value: 'q' +Warning 1292 Truncated incorrect DOUBLE value: 'r' +Warning 1292 Truncated incorrect DOUBLE value: 'u' +Warning 1292 Truncated incorrect DOUBLE value: 'w' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'y' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'g' +Warning 1292 Truncated incorrect DOUBLE value: 'k' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'o' +Warning 1292 Truncated incorrect DOUBLE value: 'q' +Warning 1292 Truncated incorrect DOUBLE value: 'r' +Warning 1292 Truncated incorrect DOUBLE value: 'u' +Warning 1292 Truncated incorrect DOUBLE value: 'w' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'x' +Warning 1292 Truncated incorrect DOUBLE value: 'y' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'd' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'f' +Warning 1292 Truncated incorrect DOUBLE value: 'g' +Warning 1292 Truncated incorrect DOUBLE value: 'k' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'm' +Warning 1292 Truncated incorrect DOUBLE value: 'o' +Warning 1292 Truncated incorrect DOUBLE value: 'q' +Warning 1292 Truncated incorrect DOUBLE value: 'r' +Warning 1292 Truncated incorrect DOUBLE value: 'u' DROP TABLE t1,t2; # # End of 10.2 tests diff -Nru mariadb-10.11.11/mysql-test/main/type_varbinary.result mariadb-10.11.13/mysql-test/main/type_varbinary.result --- mariadb-10.11.11/mysql-test/main/type_varbinary.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/type_varbinary.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,42 @@ +# +# MDEV-36235 Incorrect result for BETWEEN over unique blob prefix +# +CREATE TABLE t1 (c1 VARBINARY(10), UNIQUE (c1)); +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +2 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +SELECT c1 FROM t1 IGNORE KEY(c1) WHERE 'a' BETWEEN 0 AND (c1); +c1 +1 +2 +Warnings: +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +Warning 1292 Truncated incorrect DOUBLE value: 'a' +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +c1 +-1 +-2 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +SELECT c1 FROM t1 IGNORE KEY(c1) WHERE '#' BETWEEN c1 AND 0; +c1 +-2 +-1 +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +Warning 1292 Truncated incorrect DECIMAL value: '#' +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/main/type_varbinary.test mariadb-10.11.13/mysql-test/main/type_varbinary.test --- mariadb-10.11.11/mysql-test/main/type_varbinary.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/type_varbinary.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,10 @@ +--echo # +--echo # MDEV-36235 Incorrect result for BETWEEN over unique blob prefix +--echo # +CREATE TABLE t1 (c1 VARBINARY(10), UNIQUE (c1)); +INSERT INTO t1 (c1) VALUES (-2),(-1),(1),(2); +SELECT c1 FROM t1 WHERE 'a' BETWEEN 0 AND (c1); +SELECT c1 FROM t1 IGNORE KEY(c1) WHERE 'a' BETWEEN 0 AND (c1); +SELECT c1 FROM t1 WHERE '#' BETWEEN c1 AND 0; +SELECT c1 FROM t1 IGNORE KEY(c1) WHERE '#' BETWEEN c1 AND 0; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/main/update.result mariadb-10.11.13/mysql-test/main/update.result --- mariadb-10.11.11/mysql-test/main/update.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/update.result 2025-05-19 16:14:24.000000000 +0000 @@ -765,3 +765,83 @@ u xxb drop table t1; # End of MariaDB 10.4 tests +# +# MDEV-35955 Wrong result for UPDATE ... ORDER BY LIMIT which uses tmp.table +# +create table t1 (id int primary key, v int); +create table t2 (id int primary key, v int); +insert into t1 (id, v) values (2,3),(1,4); +insert into t2 (id, v) values (5,5),(6,6); +select t1.*, t2.* from t1, t2 order by t1.id, t2.id limit 2; +id v id v +1 4 5 5 +1 4 6 6 +UPDATE t1, t2 SET t1.v=-1, t2.v=-1 ORDER BY t1.id, t2.id LIMIT 2; +select * from t1; +id v +2 3 +1 -1 +select * from t2; +id v +5 -1 +6 -1 +drop table t1, t2; +create table t1 (id int primary key, v text) engine=myisam; +create table t2 (id int primary key, v text) engine=myisam; +insert into t1 (id, v) values (1,'b'),(2,'fo'),(3,'bar'),(4,'barr'),(5,'bazzz'); +insert into t2 (id, v) values (6,'quxqux'),(7,'foofoof'),(8,'barbarba'),(9,'quxquxqux'),(10,'bazbazbazb'); +select t1.*, t2.* from t1, t2 order by t1.id, t2.id limit 2; +id v id v +1 b 6 quxqux +1 b 7 foofoof +update t1, t2 set t1.v='DELETED', t2.v='DELETED' order by t1.id, t2.id limit 2; +select * from t1; +id v +1 DELETED +2 fo +3 bar +4 barr +5 bazzz +select * from t2; +id v +6 DELETED +7 DELETED +8 barbarba +9 quxquxqux +10 bazbazbazb +drop table t1, t2; +create table t1 (id int primary key, v int); +create table t2 (id int primary key, v int); +create table t3 (id int primary key, v int); +insert into t1 (id, v) values (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000); +insert into t2 (id, v) values (10, 100), (20, 200), (30, 300), (40, 400), (50, 500); +insert into t3 (id, v) values (11, 111), (22, 222), (33, 333), (44, 444), (55, 555); +select t1.*, t2.*, t3.* from t1, t2, t3 order by t1.id, t2.id, t3.id limit 3; +id v id v id v +1 1000 10 100 11 111 +1 1000 10 100 22 222 +1 1000 10 100 33 333 +UPDATE t1, t2, t3 SET t1.v=-1, t2.v=-2, t3.v=-3 ORDER BY t1.id, t2.id, t3.id LIMIT 3; +select * from t1; +id v +1 -1 +2 2000 +3 3000 +4 4000 +5 5000 +select * from t2; +id v +10 -2 +20 200 +30 300 +40 400 +50 500 +select * from t3; +id v +11 -3 +22 -3 +33 -3 +44 444 +55 555 +drop table t1, t2, t3; +# End of MariaDB 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/main/update.test mariadb-10.11.13/mysql-test/main/update.test --- mariadb-10.11.11/mysql-test/main/update.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/update.test 2025-05-19 16:14:24.000000000 +0000 @@ -707,3 +707,43 @@ drop table t1; --echo # End of MariaDB 10.4 tests + +--echo # +--echo # MDEV-35955 Wrong result for UPDATE ... ORDER BY LIMIT which uses tmp.table +--echo # + +create table t1 (id int primary key, v int); +create table t2 (id int primary key, v int); +insert into t1 (id, v) values (2,3),(1,4); +insert into t2 (id, v) values (5,5),(6,6); +select t1.*, t2.* from t1, t2 order by t1.id, t2.id limit 2; +UPDATE t1, t2 SET t1.v=-1, t2.v=-1 ORDER BY t1.id, t2.id LIMIT 2; +select * from t1; +select * from t2; + +drop table t1, t2; +create table t1 (id int primary key, v text) engine=myisam; +create table t2 (id int primary key, v text) engine=myisam; +insert into t1 (id, v) values (1,'b'),(2,'fo'),(3,'bar'),(4,'barr'),(5,'bazzz'); +insert into t2 (id, v) values (6,'quxqux'),(7,'foofoof'),(8,'barbarba'),(9,'quxquxqux'),(10,'bazbazbazb'); +select t1.*, t2.* from t1, t2 order by t1.id, t2.id limit 2; +update t1, t2 set t1.v='DELETED', t2.v='DELETED' order by t1.id, t2.id limit 2; +select * from t1; +select * from t2; + +drop table t1, t2; +create table t1 (id int primary key, v int); +create table t2 (id int primary key, v int); +create table t3 (id int primary key, v int); +insert into t1 (id, v) values (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000); +insert into t2 (id, v) values (10, 100), (20, 200), (30, 300), (40, 400), (50, 500); +insert into t3 (id, v) values (11, 111), (22, 222), (33, 333), (44, 444), (55, 555); +select t1.*, t2.*, t3.* from t1, t2, t3 order by t1.id, t2.id, t3.id limit 3; +UPDATE t1, t2, t3 SET t1.v=-1, t2.v=-2, t3.v=-3 ORDER BY t1.id, t2.id, t3.id LIMIT 3; +select * from t1; +select * from t2; +select * from t3; + +drop table t1, t2, t3; + +--echo # End of MariaDB 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/main/userstat.result mariadb-10.11.13/mysql-test/main/userstat.result --- mariadb-10.11.11/mysql-test/main/userstat.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/userstat.result 2025-05-19 16:14:24.000000000 +0000 @@ -247,6 +247,11 @@ ERROR 21000: Subquery returns more than 1 row set global userstat= 0; drop function f; -# # End of 10.2 tests # +# MDEV-36586 USER_STATISTICS.BUSY_TIME is in microseconds +# +select distinct busy_time>1e5, cpu_time>1e5 from information_schema.user_statistics; +busy_time>1e5 cpu_time>1e5 +0 0 +# End of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/main/userstat.test mariadb-10.11.13/mysql-test/main/userstat.test --- mariadb-10.11.11/mysql-test/main/userstat.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/userstat.test 2025-05-19 16:14:24.000000000 +0000 @@ -135,6 +135,11 @@ drop function f; --enable_ps2_protocol ---echo # --echo # End of 10.2 tests + --echo # +--echo # MDEV-36586 USER_STATISTICS.BUSY_TIME is in microseconds +--echo # +select distinct busy_time>1e5, cpu_time>1e5 from information_schema.user_statistics; + +--echo # End of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/main/view.result mariadb-10.11.13/mysql-test/main/view.result --- mariadb-10.11.11/mysql-test/main/view.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/view.result 2025-05-19 16:14:24.000000000 +0000 @@ -944,31 +944,19 @@ create view v2 as select * from v1; create view v3 as select v2.col1 from v2,t2 where v2.col1 = t2.col1; insert into v2 values ((select max(col1) from v1)); -ERROR HY000: The definition of table 'v1' prevents operation INSERT on table 'v2' insert into t1 values ((select max(col1) from v1)); -ERROR HY000: The definition of table 'v1' prevents operation INSERT on table 't1' insert into v2 values ((select max(col1) from v1)); -ERROR HY000: The definition of table 'v1' prevents operation INSERT on table 'v2' insert into v2 values ((select max(col1) from t1)); -ERROR HY000: The definition of table 'v2' prevents operation INSERT on table 'v2' insert into t1 values ((select max(col1) from t1)); -ERROR HY000: Table 't1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into v2 values ((select max(col1) from t1)); -ERROR HY000: The definition of table 'v2' prevents operation INSERT on table 'v2' insert into v2 values ((select max(col1) from v2)); -ERROR HY000: Table 'v2' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into t1 values ((select max(col1) from v2)); -ERROR HY000: The definition of table 'v2' prevents operation INSERT on table 't1' insert into v2 values ((select max(col1) from v2)); -ERROR HY000: Table 'v2' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into v3 (col1) values ((select max(col1) from v1)); -ERROR HY000: The definition of table 'v1' prevents operation INSERT on table 'v3' insert into v3 (col1) values ((select max(col1) from t1)); -ERROR HY000: The definition of table 'v3' prevents operation INSERT on table 'v3' insert into v3 (col1) values ((select max(col1) from v2)); -ERROR HY000: The definition of table 'v2' prevents operation INSERT on table 'v3' -insert into v3 (col1) values ((select CONVERT_TZ('20050101000000','UTC','MET') from v2)); -ERROR HY000: The definition of table 'v2' prevents operation INSERT on table 'v3' +insert into v3 (col1) values ((select CONVERT_TZ('20050101000000','UTC','MET') from v2 LIMIT 1)); +ERROR 22003: Out of range value for column 'col1' at row 3 insert into v3 (col1) values ((select CONVERT_TZ('20050101000000','UTC','MET') from t2)); insert into t3 values ((select CONVERT_TZ('20050101000000','UTC','MET') from t2)); ERROR 23000: Column 'col1' cannot be null @@ -978,6 +966,18 @@ select * from t1; col1 NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL 1 2 3 @@ -1332,9 +1332,26 @@ insert into v3 values (30); ERROR HY000: The target table v3 of the INSERT is not insertable-into create view v4 as select * from v2 where 20 < (select (s1) from t1); +select * from t1; +s1 insert into v4 values (30); -ERROR HY000: The target table v4 of the INSERT is not insertable-into -drop view v4, v3, v2, v1; +select * from t1; +s1 +30 +create view v5 as select * from v2 where s1 < (select min(s1) from t1) WITH CHECK OPTION; +# can't insert only less then minimum +insert into v5 values (40); +ERROR 44000: CHECK OPTION failed `test`.`v5` +# allow insert the new minimum +insert into v5 values (10); +# always emply view (can't be something less than minimum) +select * from v5; +s1 +select * from t1; +s1 +30 +10 +drop view v5, v4, v3, v2, v1; drop table t1; create table t1 (a int); create view v1 as select * from t1; diff -Nru mariadb-10.11.11/mysql-test/main/view.test mariadb-10.11.13/mysql-test/main/view.test --- mariadb-10.11.11/mysql-test/main/view.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/view.test 2025-05-19 16:14:24.000000000 +0000 @@ -866,33 +866,21 @@ create view v1 as select * from t1; create view v2 as select * from v1; create view v3 as select v2.col1 from v2,t2 where v2.col1 = t2.col1; --- error ER_VIEW_PREVENT_UPDATE insert into v2 values ((select max(col1) from v1)); --- error ER_VIEW_PREVENT_UPDATE insert into t1 values ((select max(col1) from v1)); --- error ER_VIEW_PREVENT_UPDATE insert into v2 values ((select max(col1) from v1)); --- error ER_VIEW_PREVENT_UPDATE insert into v2 values ((select max(col1) from t1)); --- error ER_UPDATE_TABLE_USED insert into t1 values ((select max(col1) from t1)); --- error ER_VIEW_PREVENT_UPDATE insert into v2 values ((select max(col1) from t1)); --- error ER_UPDATE_TABLE_USED insert into v2 values ((select max(col1) from v2)); --- error ER_VIEW_PREVENT_UPDATE insert into t1 values ((select max(col1) from v2)); --- error ER_UPDATE_TABLE_USED insert into v2 values ((select max(col1) from v2)); --- error ER_VIEW_PREVENT_UPDATE insert into v3 (col1) values ((select max(col1) from v1)); --- error ER_VIEW_PREVENT_UPDATE insert into v3 (col1) values ((select max(col1) from t1)); --- error ER_VIEW_PREVENT_UPDATE insert into v3 (col1) values ((select max(col1) from v2)); # check with TZ tables in list --- error ER_VIEW_PREVENT_UPDATE -insert into v3 (col1) values ((select CONVERT_TZ('20050101000000','UTC','MET') from v2)); +--error ER_WARN_DATA_OUT_OF_RANGE +insert into v3 (col1) values ((select CONVERT_TZ('20050101000000','UTC','MET') from v2 LIMIT 1)); insert into v3 (col1) values ((select CONVERT_TZ('20050101000000','UTC','MET') from t2)); -- error ER_BAD_NULL_ERROR insert into t3 values ((select CONVERT_TZ('20050101000000','UTC','MET') from t2)); @@ -1210,9 +1198,19 @@ -- error ER_NON_INSERTABLE_TABLE insert into v3 values (30); create view v4 as select * from v2 where 20 < (select (s1) from t1); --- error ER_NON_INSERTABLE_TABLE +select * from t1; insert into v4 values (30); -drop view v4, v3, v2, v1; +select * from t1; +create view v5 as select * from v2 where s1 < (select min(s1) from t1) WITH CHECK OPTION; +--echo # can't insert only less then minimum +--error ER_VIEW_CHECK_FAILED +insert into v5 values (40); +--echo # allow insert the new minimum +insert into v5 values (10); +--echo # always emply view (can't be something less than minimum) +select * from v5; +select * from t1; +drop view v5, v4, v3, v2, v1; drop table t1; # diff -Nru mariadb-10.11.11/mysql-test/main/view_grant.result mariadb-10.11.13/mysql-test/main/view_grant.result --- mariadb-10.11.11/mysql-test/main/view_grant.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/view_grant.result 2025-05-19 16:14:24.000000000 +0000 @@ -1982,6 +1982,52 @@ DROP VIEW v1; DROP USER foo; DROP USER FOO; +# +# MDEV-36380: User has unauthorized access to a sequence through +# a view with security invoker +# +create database db; +use db; +create sequence s; +create sql security invoker view vin as select nextval(s); +create sql security definer view vdn as select nextval(s); +create sql security invoker view vil as select lastval(s); +create sql security definer view vdl as select lastval(s); +create sql security invoker view vis as select setval(s,20); +create sql security definer view vds as select setval(s,30); +create user u@localhost; +grant select on db.vin to u@localhost; +grant select on db.vdn to u@localhost; +grant select on db.vil to u@localhost; +grant select on db.vdl to u@localhost; +grant select on db.vis to u@localhost; +grant select on db.vds to u@localhost; +connect con1,localhost,u,,db; +select nextval(s); +ERROR 42000: SELECT, INSERT command denied to user 'u'@'localhost' for table `db`.`s` +select * from vin; +ERROR HY000: View 'db.vin' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them +select * from vdn; +nextval(s) +1 +select lastval(s); +ERROR 42000: SELECT command denied to user 'u'@'localhost' for table `db`.`s` +select * from vil; +ERROR HY000: View 'db.vil' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them +select * from vdl; +lastval(s) +1 +select setval(s,10); +ERROR 42000: INSERT command denied to user 'u'@'localhost' for table `db`.`s` +select * from vis; +ERROR HY000: View 'db.vis' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them +select * from vds; +setval(s,30) +30 +disconnect con1; +connection default; +drop database db; +drop user u@localhost; # End of 10.5 tests # Check that a user without access to the schema 'foo' cannot query # a JSON_TABLE view in that schema. diff -Nru mariadb-10.11.11/mysql-test/main/view_grant.test mariadb-10.11.13/mysql-test/main/view_grant.test --- mariadb-10.11.11/mysql-test/main/view_grant.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/main/view_grant.test 2025-05-19 16:14:24.000000000 +0000 @@ -2237,6 +2237,53 @@ DROP USER foo; DROP USER FOO; +--echo # +--echo # MDEV-36380: User has unauthorized access to a sequence through +--echo # a view with security invoker +--echo # +create database db; +use db; +create sequence s; +create sql security invoker view vin as select nextval(s); +create sql security definer view vdn as select nextval(s); +create sql security invoker view vil as select lastval(s); +create sql security definer view vdl as select lastval(s); +create sql security invoker view vis as select setval(s,20); +create sql security definer view vds as select setval(s,30); +create user u@localhost; +grant select on db.vin to u@localhost; +grant select on db.vdn to u@localhost; +grant select on db.vil to u@localhost; +grant select on db.vdl to u@localhost; +grant select on db.vis to u@localhost; +grant select on db.vds to u@localhost; + +--connect (con1,localhost,u,,db) +--error ER_TABLEACCESS_DENIED_ERROR +select nextval(s); +--error ER_VIEW_INVALID +select * from vin; +--disable_ps2_protocol +select * from vdn; +--enable_ps2_protocol + +--error ER_TABLEACCESS_DENIED_ERROR +select lastval(s); +--error ER_VIEW_INVALID +select * from vil; +select * from vdl; + +--error ER_TABLEACCESS_DENIED_ERROR +select setval(s,10); +--error ER_VIEW_INVALID +select * from vis; +select * from vds; + +--disconnect con1 +--connection default +drop database db; +drop user u@localhost; + --echo # End of 10.5 tests --echo # Check that a user without access to the schema 'foo' cannot query diff -Nru mariadb-10.11.11/mysql-test/mariadb-test-run.pl mariadb-10.11.13/mysql-test/mariadb-test-run.pl --- mariadb-10.11.11/mysql-test/mariadb-test-run.pl 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/mariadb-test-run.pl 2025-05-19 16:14:24.000000000 +0000 @@ -130,6 +130,8 @@ our $path_current_testlog; our $path_testlog; +our $opt_open_files_limit; + our $default_vardir; our $opt_vardir; # Path to use for var/ dir our $plugindir; @@ -268,6 +270,9 @@ our $opt_skip_not_found= 0; our $opt_mem= $ENV{'MTR_MEM'}; our $opt_clean_vardir= $ENV{'MTR_CLEAN_VARDIR'}; +our $opt_catalogs= 0; +our $opt_catalog_name=""; +our $catalog_name="def"; our $opt_gcov; our $opt_gprof; @@ -1274,6 +1279,7 @@ 'list-options' => \$opt_list_options, 'skip-test-list=s' => \@opt_skip_test_list, 'xml-report=s' => \$opt_xml_report, + 'open-files-limit=i', => \$opt_open_files_limit, My::Debugger::options(), My::CoreDump::options(), @@ -2223,6 +2229,9 @@ { $ENV{'MYSQL_INSTALL_DB_EXE'}= mtr_exe_exists("$bindir/sql$multiconfig/mariadb-install-db", "$bindir/bin/mariadb-install-db"); + $ENV{'MARIADB_UPGRADE_SERVICE_EXE'}= mtr_exe_exists("$bindir/sql$multiconfig/mariadb-upgrade-service", + "$bindir/bin/mariadb-upgrade-service"); + $ENV{'MARIADB_UPGRADE_EXE'}= mtr_exe_exists("$path_client_bindir/mariadb-upgrade"); } my $client_config_exe= @@ -3945,6 +3954,23 @@ } } + # Set up things for catalogs + # The values of MARIADB_TOPDIR and MARIAD_DATADIR should + # be taken from the values used by the default (first) + # connection that is used by mariadb-test. + my ($mysqld, @servers); + @servers= all_servers(); + $mysqld= $servers[0]; + $ENV{'MARIADB_TOPDIR'}= $mysqld->value('datadir'); + if (!$opt_catalogs) + { + $ENV{'MARIADB_DATADIR'}= $mysqld->value('datadir'); + } + else + { + $ENV{'MARIADB_DATADIR'}= $mysqld->value('datadir') . "/" . $catalog_name; + } + # Write start of testcase to log mark_log($path_current_testlog, $tinfo); @@ -4458,14 +4484,13 @@ ( @global_suppressions, qr/error .*connecting to master/, - qr/InnoDB: Error: in ALTER TABLE `test`.`t[12]`/, - qr/InnoDB: Error: table `test`.`t[12]` .*does not exist in the InnoDB internal/, - qr/InnoDB: Warning: a long semaphore wait:/, qr/InnoDB: Dumping buffer pool.*/, qr/InnoDB: Buffer pool.*/, qr/InnoDB: Could not free any blocks in the buffer pool!/, - qr/InnoDB: Warning: Writer thread is waiting this semaphore:/, qr/InnoDB: innodb_open_files .* should not be greater than/, + qr/InnoDB: Trying to delete tablespace.*but there are.*pending/, + qr/InnoDB: Tablespace 1[0-9]* was not found at .*, and innodb_force_recovery was set/, + qr/InnoDB: Long wait \([0-9]+ seconds\) for double-write buffer flush/, qr/Slave: Unknown table 't1' .* 1051/, qr/Slave SQL:.*(Internal MariaDB error code: [[:digit:]]+|Query:.*)/, qr/slave SQL thread aborted/, @@ -5745,6 +5770,7 @@ append => 1, error => $path_current_testlog, verbose => $opt_verbose, + open_files_limit => $opt_open_files_limit, ); mtr_verbose("Started $proc"); return $proc; @@ -6043,6 +6069,8 @@ timediff With --timestamp, also print time passed since *previous* test started max-connections=N Max number of open connection to server in mysqltest + open-files-limit=N Max number of open files allowed for any of the children + of my_safe_process. Default is 1024. report-times Report how much time has been spent on different phases of test execution. stress=ARGS Run stress test, providing options to diff -Nru mariadb-10.11.11/mysql-test/std_data/galera_certs/galera.root.crt mariadb-10.11.13/mysql-test/std_data/galera_certs/galera.root.crt --- mariadb-10.11.11/mysql-test/std_data/galera_certs/galera.root.crt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/std_data/galera_certs/galera.root.crt 2025-05-19 16:14:24.000000000 +0000 @@ -2,7 +2,7 @@ MIIFlTCCA32gAwIBAgIUKCF88W+48rZzdfgYpE2dXVMGSKgwDQYJKoZIhvcNAQEL BQAwWjELMAkGA1UEBhMCRkkxETAPBgNVBAgMCEhlbHNpbmtpMREwDwYDVQQHDAhI ZWxzaW5raTEPMA0GA1UECgwGR2FsZXJhMRQwEgYDVQQDDAtnYWxlcmEucm9vdDAe -Fw0yMTAyMDQxMzE3MDJaFw0yMzExMjUxMzE3MDJaMFoxCzAJBgNVBAYTAkZJMREw +Fw0yMzEyMDExMzQzNDBaFw0zMzExMjgxMzQzNDBaMFoxCzAJBgNVBAYTAkZJMREw DwYDVQQIDAhIZWxzaW5raTERMA8GA1UEBwwISGVsc2lua2kxDzANBgNVBAoMBkdh bGVyYTEUMBIGA1UEAwwLZ2FsZXJhLnJvb3QwggIiMA0GCSqGSIb3DQEBAQUAA4IC DwAwggIKAoICAQDKqL45jbaq8RLOj+DeilPcEnBN5gn/y9V3IfZ0BQCd4bR09zLz @@ -18,15 +18,15 @@ F+XZTdTiaOWPEmvFFGLLUQxKl4w872hJaupqfteqdiZ+3ICVIUI8qnXHmwIDAQAB o1MwUTAdBgNVHQ4EFgQUs75v/MgjJ5RHGE6+0qdiVo4BwlowHwYDVR0jBBgwFoAU s75v/MgjJ5RHGE6+0qdiVo4BwlowDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0B -AQsFAAOCAgEAOVhBs28dwwvD5q2r7oVVcxLc+tb8zu4XxpXT1p6hiZYUyPguCh00 -GVdXCgR4JMI/NcyM5fBAbF3S8oK3+9rw2kW09afVV06Qf/8o3nIyOiDl7598tGIP -CCK4QsUW/dGajx5kvhtQ7qce+u9KfFTof6lq2xkYtFBBhmBdSv9A1jAZJMw2x3bc -nr99PS8XZMphS0MIExHKj6Ry5DdYm722zZHyIEiiEGyMViDm2m1iug5r/LPH5Z56 -BjQiH4VP+0y5mevBOUGuH8ID+J9Hu9BeoXLhkv+W2Ljs/S6wqzjinMBqVG+wwe0Y -a8F5pABkl5uX38nMQ7CikSbLxSbn7nRf+sux1sbzqjMldeCSqiv9mI5Ysq97+Ni1 -5qMxNxNc0u/wGRnrXH8fWfxBKPP5moA7DQfVcUWPgDGQwDpA8kn8RlJxFk3g4yaK -+NMwk5MORKyx3tz/A3Yhs9AUXk3okvmQCT2YVSHcKUB8PAU+TaKqbr3wk07Y/tL/ -jFPHS+t3eD91Y05KGUXjdtGi+33zpV0biHmTWAZT78VQowDNvEpTnXhkSx8HGHYR -nqSMU2m2LboHSatY113RYznx0LJ1azczRlJdGs8oyPWLPDD2JCesZaQqGZVRJoms -lK4EzYEb5mZTCRgtgoiO+iKcf6XifuOCrWZXoLm4FlLEfOQ3b8yAFlo= +AQsFAAOCAgEAKLV6mkWb88HEJXo1XlmAzznIYNfilrvvxwcjhceluDE8s8sPSpYM +Bz5ebWlHCgEkC/ezhA/PDtZsZlQKwv4jb++lAlFSlebT1GW77xKkdRBTKgkFAaOA +pF5eZao6IP8l76fA4OoI2Tttw5jeb23kOoklDp/8VS0JEAT3wm/hZiE20aUbAFC+ +kPiCucBztzaTHQud9CgtxRH/B3D9FaPuwae/H6FYrvQVNVjcaHTIUh9fTcyKRXYm +oYbvK7fIhCjZkG2LRWRU9Kirivb+ktO4POsuK4BgYrsFaOBf9HYsojA7llyGDopN +cfw9jtb27Qb/uMKJnClFg14u685CU5JAzY31E5OQPPUUx9PqP4Z9PgXRQ0xI6H/4 +sejlcQuqGCDKiL2lOzUjbT86EjO4ZfiKHR+lKOIuT5mXiR8cbS1JeyX3Mrv1Ds4r +UVcdtSXTy6/XYWFIzhu+MrsFon6VX0HkmSH1HjSoLMOZcHAZIFZZ/uAahLmMNaEG +lV15fD5+t5QRKwqmdFUW2ETiqSJxRs6Y++ptxpiiH38QVWPvBWeRgcPpf3A478Bl +iGO0xn0N57TnhFs3g0C0xyZgTBMozfVostYpps1Tqqz0VOhtmURxTZm9JZgTb7qv +nMURY0SIQKXpHCcJuNtxZcDSu8uxgUcMsLSSC7Zmk7/cSeUfmOgZVzU= -----END CERTIFICATE----- diff -Nru mariadb-10.11.11/mysql-test/suite/archive/archive-big.test mariadb-10.11.13/mysql-test/suite/archive/archive-big.test --- mariadb-10.11.11/mysql-test/suite/archive/archive-big.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/archive/archive-big.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,7 @@ --source include/big_test.inc -# Valgrind is to slow for this test +# Valgrind and msan is to slow for this test --source include/not_valgrind.inc +--source include/not_msan.inc --source include/have_archive.inc CREATE TABLE t1(a BLOB) ENGINE=ARCHIVE; --disable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/README.txt mariadb-10.11.13/mysql-test/suite/atomic/README.txt --- mariadb-10.11.11/mysql-test/suite/atomic/README.txt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/README.txt 2025-05-19 16:14:24.000000000 +0000 @@ -3,7 +3,7 @@ - Add # before --exec echo "restart" ... - Force $e (engine), $c (crash point) and $r (crash position) to the values - where things goes wrong. See comments in alter_table.test for how to do this. + where things goes wrong. See comments in alter_table.inc for how to do this. - start mariadbd in a debugger run the following in the debugger diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table.inc mariadb-10.11.13/mysql-test/suite/atomic/alter_table.inc --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table.inc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table.inc 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,198 @@ +--source include/long_test.inc +--source include/have_debug.inc +--source include/have_log_bin.inc + +if (!$BIG_TEST) +{ + --source include/not_valgrind.inc + --source include/not_msan.inc +} + +# +# Testing of atomic create table with crashes in a lot of different places +# +# Things tested: +# With myisam and InnoDB engines to ensure that cover both normal and +# online alter table paths. +# Alter table with new columns +# Alter table which only touches .frm +# Alter table disable keys (has it own code path) +# Alter table with rename +# Alter table with rename and only options that touches .frm +# Alter table with rename and add new columns +# Alter table with storage engine change (with and without column definition +# changes) +# Alter table with storage engine change and rename +# Alter table to another database + +--disable_query_log +call mtr.add_suppression("InnoDB: .* does not exist in the InnoDB internal"); +# Speed up wait_until_connected_again.inc +let NO_WSREP=1; +--enable_query_log +let $MYSQLD_DATADIR= `SELECT @@datadir`; + +create database test2; +RESET MASTER; + +if ($engine_count == "") +{ + let $engine_count=2; + let $engines='myisam','innodb'; +} +if ($extra_engine == "") +{ + let $extra_engine=aria; +} + +let $crash_count=13; +let $crash_points='ddl_log_alter_after_create_frm', 'ddl_log_alter_after_create_table', 'ddl_log_alter_after_prepare_inplace','ddl_log_alter_after_copy', 'ddl_log_alter_after_log', 'ddl_log_alter_after_rename_to_backup', 'ddl_log_alter_after_rename_to_backup_log', 'ddl_log_alter_rename_frm', 'ddl_log_alter_after_rename_to_original', 'ddl_log_alter_before_rename_triggers', 'ddl_log_alter_after_rename_triggers', 'ddl_log_alter_after_delete_backup', 'ddl_log_alter_after_drop_original_table'; + +let $statement_count=16; +let $statements='ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new"', + 'ALTER TABLE t1 COMMENT "new"', + 'ALTER TABLE t1 change column a c int COMMENT "new"', + 'ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2', + 'ALTER TABLE t1 disable keys', + 'ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new"', + 'ALTER TABLE t1 rename t2', + 'ALTER TABLE t1 COMMENT "new", rename t2', + 'ALTER TABLE t1 change column a c int COMMENT "new", rename t2', + 'ALTER TABLE t1 ENGINE=$extra_engine, COMMENT "new"', + 'ALTER TABLE t1 change column a c int COMMENT "new", engine=$extra_engine', + 'ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=$extra_engine', + 'ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2', + 'ALTER TABLE t1 COMMENT "new", rename test2.t2', + 'ALTER TABLE t1 ADD key(b), COMMENT "new"', + 'ALTER TABLE t1 DROP INDEX a'; + +# If there is a need of testing one specific state (crash point and query), +# one can use the comments below to execute one specific test combination +#let $crash_count=1; +#let $crash_points='ddl_log_alter_after_create_frm'; +#let $statement_count= 1; +#let $statements='ALTER TABLE t1 ADD COLUMN c int, COMMENT "new"'; +#let $engine_count=1; +#let $engines='rocksdb'; +#--source include/have_rocksdb.inc + +let $old_debug=`select @@debug_dbug`; +let $e=0; +let $keep_include_silent=1; +let $grep_script=ALTER; +--disable_query_log + +while ($e < $engine_count) +{ + inc $e; + let $engine=`select ELT($e, $engines)`; + let $default_engine=$engine; + + --echo + --echo engine: $engine + --echo + + let $r=0; + while ($r < $statement_count) + { + inc $r; + let $statement=`select ELT($r, $statements)`; + --echo + --echo query: $statement + --echo + let $c=0; + while ($c < $crash_count) + { + inc $c; + let $crash=`select ELT($c, $crash_points)`; + + --eval create table t1 (a int, b int, key(a)) engine=$engine + insert into t1 values (1,1),(2,2); + commit; + flush tables; + + FLUSH BINARY LOGS; + --let $start_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + --echo crash point: $crash + if ($crash_count > 1) + { + --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect + } +# The following can be used for testing one specific failure +# if ($crash == "ddl_log_alter_after_log") +# { +# if ($r == 2) +# { +# --remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +# } +# } + --disable_reconnect + --eval set @@debug_dbug="+d,$crash",@debug_crash_counter=1 + let $errno=0; + --error 0,2013 + --eval $statement; + let $error=$errno; + --enable_reconnect + --source include/wait_until_connected_again.inc + --disable_query_log + --eval set @@debug_dbug="$old_debug" + + if ($error == 0) + { + echo "No crash!"; + } + if ($error != 0) + { + --list_files $MYSQLD_DATADIR/test t* + --list_files $MYSQLD_DATADIR/test *sql* + --list_files $MYSQLD_DATADIR/test2 t* + --list_files $MYSQLD_DATADIR/test2 *sql* + # Check which tables still exists + --error 0,1 + --file_exists $MYSQLD_DATADIR/test/t1.frm + let $error2=$errno; + if ($error2 == 0) + { + show create table t1; + select count(*) from t1; + } + if ($error2 == 1) + { + --error 0,1 + --file_exists $MYSQLD_DATADIR/test/t2.frm + let $error3=$errno; + if ($error3 == 0) + { + show create table t2; + select count(*) from t2; + } + if ($error3 == 1) + { + --echo "Table is in test2" + show create table test2.t2; + select count(*) from test2.t2; + } + } + --let $binlog_file=$start_binlog_file + --let $binlog_output_name=master-bin.000001 + + --source include/show_binlog_events.inc + if ($error) + { + --let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + --let $binlog_output_name=master-bin.000002 + if ($binlog_file != $start_binlog_file) + { + --source include/show_binlog_events.inc + } + } + } + --disable_warnings + drop table if exists t1,t2; + drop table if exists test2.t2; + --enable_warnings + } + } +} +drop database test2; +--enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table.opt mariadb-10.11.13/mysql-test/suite/atomic/alter_table.opt --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ ---innodb-max-dirty-pages-pct=0 diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table.result mariadb-10.11.13/mysql-test/suite/atomic/alter_table.result --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,3135 +0,0 @@ -create database test2; -RESET MASTER; - -engine: myisam - - -query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" -crash point: ddl_log_alter_after_drop_original_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" - -query: ALTER TABLE t1 COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new" -crash point: ddl_log_alter_after_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 change column a c int COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new" -crash point: ddl_log_alter_after_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_to_backup -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_to_backup_log -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_before_rename_triggers -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_triggers -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_after_delete_backup -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_after_drop_original_table -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 - -query: ALTER TABLE t1 disable keys - -crash point: ddl_log_alter_after_create_frm -"No crash!" -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -"No crash!" -crash point: ddl_log_alter_after_log -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_after_drop_original_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" - -query: ALTER TABLE t1 rename t2 - -crash point: ddl_log_alter_after_create_frm -"No crash!" -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -"No crash!" -crash point: ddl_log_alter_after_log -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_rename_triggers -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 COMMENT "new", rename t2 - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 -crash point: ddl_log_alter_after_log -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_triggers -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 change column a c int COMMENT "new", rename t2 - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 -crash point: ddl_log_alter_after_log -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_triggers -t2.MYD -t2.MYI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 ENGINE=aria, COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup_log -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_after_drop_original_table -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" - -query: ALTER TABLE t1 change column a c int COMMENT "new", engine=aria - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_after_rename_to_backup -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_after_rename_to_backup_log -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_after_drop_original_table -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria - -query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_rename_to_backup -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_rename_to_backup_log -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_before_rename_triggers -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_rename_triggers -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_delete_backup -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_drop_original_table -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria - -query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_to_backup -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_to_backup_log -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_before_rename_triggers -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_triggers -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_delete_backup -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_drop_original_table -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 - -query: ALTER TABLE t1 COMMENT "new", rename test2.t2 - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_triggers -t2.MYD -t2.MYI -t2.frm -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 ADD key(b), COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`), - KEY `b` (`b`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`), - KEY `b` (`b`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`), - KEY `b` (`b`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`), - KEY `b` (`b`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`), - KEY `b` (`b`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" -crash point: ddl_log_alter_after_drop_original_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`), - KEY `b` (`b`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" - -query: ALTER TABLE t1 DROP INDEX a - -crash point: ddl_log_alter_after_create_frm -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a -crash point: ddl_log_alter_after_rename_to_backup -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a -crash point: ddl_log_alter_after_rename_to_backup_log -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a -crash point: ddl_log_alter_after_drop_original_table -t1.MYD -t1.MYI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL -) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a - -engine: innodb - - -query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 change column a c int COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_triggers -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 disable keys - -crash point: ddl_log_alter_after_create_frm -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_create_table -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_copy -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_log -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_rename_frm -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_delete_backup -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -Warnings: -Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option -"No crash!" - -query: ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" -crash point: ddl_log_alter_after_drop_original_table -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" - -query: ALTER TABLE t1 rename t2 - -crash point: ddl_log_alter_after_create_frm -"No crash!" -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -"No crash!" -crash point: ddl_log_alter_after_log -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_rename_triggers -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 COMMENT "new", rename t2 - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_triggers -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 change column a c int COMMENT "new", rename t2 - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 -crash point: ddl_log_alter_after_rename_triggers -t2.frm -t2.ibd -Table Create Table -t2 CREATE TABLE `t2` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 ENGINE=aria, COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup_log -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" -crash point: ddl_log_alter_after_drop_original_table -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" - -query: ALTER TABLE t1 change column a c int COMMENT "new", engine=aria - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_after_rename_to_backup -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_after_rename_to_backup_log -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria -crash point: ddl_log_alter_after_drop_original_table -t1.MAD -t1.MAI -t1.frm -Table Create Table -t1 CREATE TABLE `t1` ( - `c` int(11) DEFAULT NULL COMMENT 'new', - `b` int(11) DEFAULT NULL, - KEY `a` (`c`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria - -query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_prepare_inplace -"No crash!" -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_rename_to_backup -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_rename_to_backup_log -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_before_rename_triggers -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_rename_triggers -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_delete_backup -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria -crash point: ddl_log_alter_after_drop_original_table -t2.MAD -t2.MAI -t2.frm -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria - -query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t2.frm -t2.ibd -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_triggers -t2.frm -t2.ibd -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - `c` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 COMMENT "new", rename test2.t2 - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -t2.frm -t2.ibd -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_rename_triggers -t2.frm -t2.ibd -"Table is in test2" -Table Create Table -t2 CREATE TABLE `t2` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 ADD key(b), COMMENT "new" - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`), - KEY `b` (`b`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" - -query: ALTER TABLE t1 DROP INDEX a - -crash point: ddl_log_alter_after_create_frm -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_create_table -"No crash!" -crash point: ddl_log_alter_after_prepare_inplace -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_copy -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL, - KEY `a` (`a`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -crash point: ddl_log_alter_after_log -t1.frm -t1.ibd -Table Create Table -t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` int(11) DEFAULT NULL -) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -count(*) -2 -master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a -crash point: ddl_log_alter_after_rename_to_backup -"No crash!" -crash point: ddl_log_alter_after_rename_to_backup_log -"No crash!" -crash point: ddl_log_alter_rename_frm -"No crash!" -crash point: ddl_log_alter_after_rename_to_original -"No crash!" -crash point: ddl_log_alter_before_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_rename_triggers -"No crash!" -crash point: ddl_log_alter_after_delete_backup -"No crash!" -crash point: ddl_log_alter_after_drop_original_table -"No crash!" diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table.test mariadb-10.11.13/mysql-test/suite/atomic/alter_table.test --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,198 +0,0 @@ ---source include/have_debug.inc ---source include/have_innodb.inc ---source include/have_log_bin.inc - -if (!$BIG_TEST) -{ - --source include/not_valgrind.inc - --source include/not_msan.inc -} - -# -# Testing of atomic create table with crashes in a lot of different places -# -# Things tested: -# With myisam and InnoDB engines to ensure that cover both normal and -# online alter table paths. -# Alter table with new columns -# Alter table which only touches .frm -# Alter table disable keys (has it own code path) -# Alter table with rename -# Alter table with rename and only options that touches .frm -# Alter table with rename and add new columns -# Alter table with storage engine change (with and without column definition -# changes) -# Alter table with storage engine change and rename -# Alter table to another database - ---disable_query_log -call mtr.add_suppression("InnoDB: .* does not exist in the InnoDB internal"); -# Speed up wait_until_connected_again.inc -let NO_WSREP=1; ---enable_query_log -let $MYSQLD_DATADIR= `SELECT @@datadir`; - -create database test2; -RESET MASTER; - -if ($engine_count == "") -{ - let $engine_count=2; - let $engines='myisam','innodb'; -} -if ($extra_engine == "") -{ - let $extra_engine=aria; -} - -let $crash_count=13; -let $crash_points='ddl_log_alter_after_create_frm', 'ddl_log_alter_after_create_table', 'ddl_log_alter_after_prepare_inplace','ddl_log_alter_after_copy', 'ddl_log_alter_after_log', 'ddl_log_alter_after_rename_to_backup', 'ddl_log_alter_after_rename_to_backup_log', 'ddl_log_alter_rename_frm', 'ddl_log_alter_after_rename_to_original', 'ddl_log_alter_before_rename_triggers', 'ddl_log_alter_after_rename_triggers', 'ddl_log_alter_after_delete_backup', 'ddl_log_alter_after_drop_original_table'; - -let $statement_count=16; -let $statements='ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new"', - 'ALTER TABLE t1 COMMENT "new"', - 'ALTER TABLE t1 change column a c int COMMENT "new"', - 'ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2', - 'ALTER TABLE t1 disable keys', - 'ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new"', - 'ALTER TABLE t1 rename t2', - 'ALTER TABLE t1 COMMENT "new", rename t2', - 'ALTER TABLE t1 change column a c int COMMENT "new", rename t2', - 'ALTER TABLE t1 ENGINE=$extra_engine, COMMENT "new"', - 'ALTER TABLE t1 change column a c int COMMENT "new", engine=$extra_engine', - 'ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=$extra_engine', - 'ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2', - 'ALTER TABLE t1 COMMENT "new", rename test2.t2', - 'ALTER TABLE t1 ADD key(b), COMMENT "new"', - 'ALTER TABLE t1 DROP INDEX a'; - -# If there is a need of testing one specific state (crash point and query), -# one can use the comments below to execute one specific test combination -#let $crash_count=1; -#let $crash_points='ddl_log_alter_after_create_frm'; -#let $statement_count= 1; -#let $statements='ALTER TABLE t1 ADD COLUMN c int, COMMENT "new"'; -#let $engine_count=1; -#let $engines='rocksdb'; -#--source include/have_rocksdb.inc - -let $old_debug=`select @@debug_dbug`; -let $e=0; -let $keep_include_silent=1; -let $grep_script=ALTER; ---disable_query_log - -while ($e < $engine_count) -{ - inc $e; - let $engine=`select ELT($e, $engines)`; - let $default_engine=$engine; - - --echo - --echo engine: $engine - --echo - - let $r=0; - while ($r < $statement_count) - { - inc $r; - let $statement=`select ELT($r, $statements)`; - --echo - --echo query: $statement - --echo - let $c=0; - while ($c < $crash_count) - { - inc $c; - let $crash=`select ELT($c, $crash_points)`; - - --eval create table t1 (a int, b int, key(a)) engine=$engine - insert into t1 values (1,1),(2,2); - commit; - flush tables; - - FLUSH BINARY LOGS; - --let $start_binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) - --echo crash point: $crash - if ($crash_count > 1) - { - --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect - } -# The following can be used for testing one specific failure -# if ($crash == "ddl_log_alter_after_log") -# { -# if ($r == 2) -# { -# --remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -# } -# } - --disable_reconnect - --eval set @@debug_dbug="+d,$crash",@debug_crash_counter=1 - let $errno=0; - --error 0,2013 - --eval $statement; - let $error=$errno; - --enable_reconnect - --source include/wait_until_connected_again.inc - --disable_query_log - --eval set @@debug_dbug="$old_debug" - - if ($error == 0) - { - echo "No crash!"; - } - if ($error != 0) - { - --list_files $MYSQLD_DATADIR/test t* - --list_files $MYSQLD_DATADIR/test *sql* - --list_files $MYSQLD_DATADIR/test2 t* - --list_files $MYSQLD_DATADIR/test2 *sql* - # Check which tables still exists - --error 0,1 - --file_exists $MYSQLD_DATADIR/test/t1.frm - let $error2=$errno; - if ($error2 == 0) - { - show create table t1; - select count(*) from t1; - } - if ($error2 == 1) - { - --error 0,1 - --file_exists $MYSQLD_DATADIR/test/t2.frm - let $error3=$errno; - if ($error3 == 0) - { - show create table t2; - select count(*) from t2; - } - if ($error3 == 1) - { - --echo "Table is in test2" - show create table test2.t2; - select count(*) from test2.t2; - } - } - --let $binlog_file=$start_binlog_file - --let $binlog_output_name=master-bin.000001 - - --source include/show_binlog_events.inc - if ($error) - { - --let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) - --let $binlog_output_name=master-bin.000002 - if ($binlog_file != $start_binlog_file) - { - --source include/show_binlog_events.inc - } - } - } - --disable_warnings - drop table if exists t1,t2; - drop table if exists test2.t2; - --enable_warnings - } - } -} -drop database test2; ---enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table_aria.test mariadb-10.11.13/mysql-test/suite/atomic/alter_table_aria.test --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table_aria.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_aria.test 2025-05-19 16:14:24.000000000 +0000 @@ -4,4 +4,4 @@ let $engine_count=1; let $engines='aria'; let $extra_engine=myisam; ---source alter_table.test +--source alter_table.inc diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table_innodb.opt mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.opt --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table_innodb.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.opt 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1 @@ +--innodb-max-dirty-pages-pct=0 diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table_innodb.result mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.result --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table_innodb.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,1396 @@ +create database test2; +RESET MASTER; + +engine: innodb + + +query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 change column a c int COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_triggers +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 disable keys + +crash point: ddl_log_alter_after_create_frm +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_create_table +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_copy +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_log +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_rename_frm +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_delete_backup +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +Warnings: +Note 1031 Storage engine InnoDB of the table `test`.`t1` doesn't have this option +"No crash!" + +query: ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_after_drop_original_table +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" + +query: ALTER TABLE t1 rename t2 + +crash point: ddl_log_alter_after_create_frm +"No crash!" +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +"No crash!" +crash point: ddl_log_alter_after_log +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_rename_triggers +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 COMMENT "new", rename t2 + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_triggers +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 change column a c int COMMENT "new", rename t2 + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_triggers +t2.frm +t2.ibd +Table Create Table +t2 CREATE TABLE `t2` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 ENGINE=aria, COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup_log +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_after_drop_original_table +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" + +query: ALTER TABLE t1 change column a c int COMMENT "new", engine=aria + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_after_rename_to_backup +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_after_rename_to_backup_log +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_after_drop_original_table +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria + +query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_rename_to_backup +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_rename_to_backup_log +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_before_rename_triggers +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_rename_triggers +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_delete_backup +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_drop_original_table +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria + +query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t2.frm +t2.ibd +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_triggers +t2.frm +t2.ibd +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 COMMENT "new", rename test2.t2 + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t2.frm +t2.ibd +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_triggers +t2.frm +t2.ibd +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 ADD key(b), COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 DROP INDEX a + +crash point: ddl_log_alter_after_create_frm +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.frm +t1.ibd +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table_innodb.test mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.test --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table_innodb.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_innodb.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,7 @@ +# +# Test atomic alter table with InnoDB + +--source include/have_innodb.inc +let $engine_count=1; +let $engines='innodb'; +--source alter_table.inc diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table_myisam.result mariadb-10.11.13/mysql-test/suite/atomic/alter_table_myisam.result --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table_myisam.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_myisam.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,1741 @@ +create database test2; +RESET MASTER; + +engine: myisam + + +query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" +crash point: ddl_log_alter_after_drop_original_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new" + +query: ALTER TABLE t1 COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new" +crash point: ddl_log_alter_after_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 change column a c int COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new" +crash point: ddl_log_alter_after_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_to_backup +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_to_backup_log +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_before_rename_triggers +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_triggers +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_after_delete_backup +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 +crash point: ddl_log_alter_after_drop_original_table +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2 + +query: ALTER TABLE t1 disable keys + +crash point: ddl_log_alter_after_create_frm +"No crash!" +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +"No crash!" +crash point: ddl_log_alter_after_log +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" +crash point: ddl_log_alter_after_drop_original_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, ALGORITHM=copy, COMMENT "new" + +query: ALTER TABLE t1 rename t2 + +crash point: ddl_log_alter_after_create_frm +"No crash!" +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +"No crash!" +crash point: ddl_log_alter_after_log +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_rename_triggers +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 COMMENT "new", rename t2 + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 +crash point: ddl_log_alter_after_log +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_triggers +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename t2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 change column a c int COMMENT "new", rename t2 + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 +crash point: ddl_log_alter_after_log +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 +crash point: ddl_log_alter_after_rename_triggers +t2.MYD +t2.MYI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", rename t2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 ENGINE=aria, COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup_log +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" +crash point: ddl_log_alter_after_drop_original_table +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ENGINE=aria, COMMENT "new" + +query: ALTER TABLE t1 change column a c int COMMENT "new", engine=aria + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_after_rename_to_backup +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_after_rename_to_backup_log +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria +crash point: ddl_log_alter_after_drop_original_table +t1.MAD +t1.MAI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `c` int(11) DEFAULT NULL COMMENT 'new', + `b` int(11) DEFAULT NULL, + KEY `a` (`c`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 change column a c int COMMENT "new", engine=aria + +query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_rename_to_backup +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_rename_to_backup_log +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_before_rename_triggers +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_rename_triggers +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_delete_backup +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria +crash point: ddl_log_alter_after_drop_original_table +t2.MAD +t2.MAI +t2.frm +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1 COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename t2, engine=aria + +query: ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_to_backup +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_to_backup_log +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_before_rename_triggers +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_triggers +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_delete_backup +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_drop_original_table +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD COLUMN c INT, COMMENT "new", rename test2.t2 + +query: ALTER TABLE t1 COMMENT "new", rename test2.t2 + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +"No crash!" +crash point: ddl_log_alter_after_prepare_inplace +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_to_backup +"No crash!" +crash point: ddl_log_alter_after_rename_to_backup_log +"No crash!" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +"No crash!" +crash point: ddl_log_alter_before_rename_triggers +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_rename_triggers +t2.MYD +t2.MYI +t2.frm +"Table is in test2" +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 COMMENT "new", rename test2.t2 +crash point: ddl_log_alter_after_delete_backup +"No crash!" +crash point: ddl_log_alter_after_drop_original_table +"No crash!" + +query: ALTER TABLE t1 ADD key(b), COMMENT "new" + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" +crash point: ddl_log_alter_after_rename_to_backup_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" +crash point: ddl_log_alter_after_drop_original_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci COMMENT='new' +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 ADD key(b), COMMENT "new" + +query: ALTER TABLE t1 DROP INDEX a + +crash point: ddl_log_alter_after_create_frm +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_create_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_prepare_inplace +"No crash!" +crash point: ddl_log_alter_after_copy +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +crash point: ddl_log_alter_after_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a +crash point: ddl_log_alter_after_rename_to_backup +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a +crash point: ddl_log_alter_after_rename_to_backup_log +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a +crash point: ddl_log_alter_rename_frm +"No crash!" +crash point: ddl_log_alter_after_rename_to_original +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a +crash point: ddl_log_alter_before_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_rename_triggers +"No crash!" +crash point: ddl_log_alter_after_delete_backup +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a +crash point: ddl_log_alter_after_drop_original_table +t1.MYD +t1.MYI +t1.frm +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +count(*) +2 +master-bin.000002 # Query # # use `test`; ALTER TABLE t1 DROP INDEX a diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table_myisam.test mariadb-10.11.13/mysql-test/suite/atomic/alter_table_myisam.test --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table_myisam.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_myisam.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,6 @@ +# +# Test atomic alter table with MyISAM + +let $engine_count=1; +let $engines='myisam'; +--source alter_table.inc diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table_rocksdb.test mariadb-10.11.13/mysql-test/suite/atomic/alter_table_rocksdb.test --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table_rocksdb.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_rocksdb.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,4 +3,4 @@ let $engine_count=1; let $engines='rocksdb'; set global rocksdb_flush_log_at_trx_commit=1; ---source alter_table.test +--source alter_table.inc diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/alter_table_trigger.test mariadb-10.11.13/mysql-test/suite/atomic/alter_table_trigger.test --- mariadb-10.11.11/mysql-test/suite/atomic/alter_table_trigger.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/alter_table_trigger.test 2025-05-19 16:14:24.000000000 +0000 @@ -7,7 +7,7 @@ # # Testing of atomic create table with crashes in a lot of different places # -# This is very similar to the alter_table.test, but includes testing of +# This is very similar to the alter_table.inc, but includes testing of # triggers in with ALTER TABLE .. RENAME. # diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/create_table.test mariadb-10.11.13/mysql-test/suite/atomic/create_table.test --- mariadb-10.11.11/mysql-test/suite/atomic/create_table.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/create_table.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/long_test.inc --source include/have_debug.inc --source include/have_sequence.inc --source include/have_innodb.inc diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/drop_table.test mariadb-10.11.13/mysql-test/suite/atomic/drop_table.test --- mariadb-10.11.11/mysql-test/suite/atomic/drop_table.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/drop_table.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/long_test.inc --source include/have_debug.inc --source include/have_innodb.inc --source include/have_csv.inc diff -Nru mariadb-10.11.11/mysql-test/suite/atomic/rename_table.test mariadb-10.11.13/mysql-test/suite/atomic/rename_table.test --- mariadb-10.11.11/mysql-test/suite/atomic/rename_table.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/atomic/rename_table.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/long_test.inc --source include/have_debug.inc --source include/have_innodb.inc --source include/have_csv.inc diff -Nru mariadb-10.11.11/mysql-test/suite/binlog/r/binlog_commit_fail.result mariadb-10.11.13/mysql-test/suite/binlog/r/binlog_commit_fail.result --- mariadb-10.11.11/mysql-test/suite/binlog/r/binlog_commit_fail.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/binlog/r/binlog_commit_fail.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,116 @@ +set @@session.gtid_domain_id=1; +set @save_gtid_stric_mode=@@global.gtid_strict_mode; +create table ta (a int) engine=aria; +create table ti (a int) engine=innodb; +create table ti_pk (a int primary key) engine=innodb; +create table t (a int) engine=innodb; +create function f_i() +returns integer +begin +insert into ti set a=1; +return 1; +end | +create function f_ia(arg int) +returns integer +begin +insert into ti_pk set a=1; +insert into ta set a=1; +insert into ti_pk set a=arg; +return 1; +end | +call mtr.add_suppression("Error writing file"); +select count(*) as zero from t; +zero +0 +select count(*) as zero from ta; +zero +0 +select count(*) as zero from ti; +zero +0 +# 1. simple Innodb test +set @@global.gtid_strict_mode=0; +set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +insert into t set a=1; +ERROR HY000: An attempt was made to binlog GTID VALUE which would create an out-of-order sequence number with existing GTID VALUE, and gtid strict mode is enabled +# observe effective rollback +select count(*) as zero from t; +zero +0 +# 2. simple Aira test +set @@global.gtid_strict_mode=0; +set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +insert into ta values (1),(2); +ERROR HY000: An attempt was made to binlog GTID VALUE which would create an out-of-order sequence number with existing GTID VALUE, and gtid strict mode is enabled +# note no rollback +select count(*) as '*NON-zero*' from ta; +*NON-zero* +2 +delete from ta; +# 3. multi-engine test +set @@global.gtid_strict_mode=0; +set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +insert into ta set a=f_i(); +ERROR HY000: An attempt was made to binlog GTID VALUE which would create an out-of-order sequence number with existing GTID VALUE, and gtid strict mode is enabled +# note no rollback.. +select count(*) as one from ta; +one +1 +# ..except transactional engine +select count(*) as zero from ti; +zero +0 +delete from ta; +set @@global.gtid_strict_mode=0; +set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +insert into t set a=f_ia(0); +ERROR HY000: An attempt was made to binlog GTID VALUE which would create an out-of-order sequence number with existing GTID VALUE, and gtid strict mode is enabled +# note no rollback.. +select count(*) as one from ta; +one +1 +# ..except transactional engine +select count(*) as zero from t; +zero +0 +select count(*) as zero from ti_pk; +zero +0 +delete from ta; +# 4. create-table-select-f() +set @@global.gtid_strict_mode=0; +set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +create table f_x (a int) select f_i() as a; +ERROR HY000: An attempt was made to binlog GTID VALUE which would create an out-of-order sequence number with existing GTID VALUE, and gtid strict mode is enabled +# rollback indeed takes place in the pure transactional case +select count(*) as zero from ti; +zero +0 +set @@global.gtid_strict_mode=0; +set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +create table t_x (a int) engine=aria select f_ia(0) as a; +ERROR HY000: An attempt was made to binlog GTID VALUE which would create an out-of-order sequence number with existing GTID VALUE, and gtid strict mode is enabled +select * from t_x; +ERROR 42S02: Table 'test.t_x' doesn't exist +# **TODO**: fix MDEV-36027 +# **TODO**: the empty binlog is buggy .. +include/show_binlog_events.inc +# .. as non-transactional `ta` (and `t_x` sic!) are modified +select count(*) as one from ta; +one +1 +select count(*) as zero from ti; +zero +0 +delete from ta; +#. +set @@global.gtid_strict_mode=@save_gtid_stric_mode; +drop function f_i; +drop function f_ia; +drop table t, ta, ti, ti_pk; diff -Nru mariadb-10.11.11/mysql-test/suite/binlog/r/binlog_mysqlbinlog_warn_stop_position.result mariadb-10.11.13/mysql-test/suite/binlog/r/binlog_mysqlbinlog_warn_stop_position.result --- mariadb-10.11.11/mysql-test/suite/binlog/r/binlog_mysqlbinlog_warn_stop_position.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/binlog/r/binlog_mysqlbinlog_warn_stop_position.result 2025-05-19 16:14:24.000000000 +0000 @@ -18,6 +18,51 @@ # Ensuring file offset of binlog_f2_mid < binlog_f1_end # # +# Test using --read-from-remote-server +# +connection default; +# +# --stop-position tests +# +# Case 1.a) With one binlog file, a --stop-position before the end of +# the file should not result in a warning +# MYSQL_BINLOG --read-from-remote-server --stop-position=binlog_f1_pre_rotate binlog_f1_full --result-file=tmp/warn_position_test_file.out 2>&1 +# +# Case 1.b) With one binlog file, a --stop-position at the exact end of +# the file should not result in a warning +# MYSQL_BINLOG --read-from-remote-server --stop-position=binlog_f1_end binlog_f1_full --result-file=tmp/warn_position_test_file.out 2>&1 +# +# Case 1.c) With one binlog file, a --stop-position past the end of the +# file should(!) result in a warning +# MYSQL_BINLOG --read-from-remote-server --short-form --stop-position=binlog_f1_over_eof binlog_f1_full --result-file=tmp/warn_position_test_file.out 2>&1 +WARNING: Did not reach stop position before end of input +# +# Case 2.a) With two binlog files, a --stop-position targeting b2 which +# exists in the size of b1 should: +# 1) not provide any warnings +# 2) not prevent b2 from outputting its desired events before the +# stop position +# MYSQL_BINLOG --read-from-remote-server --stop-position=binlog_f2_mid binlog_f1_full binlog_f2_full --result-file=tmp/warn_position_test_file.out 2>&1 +include/assert_grep.inc [Ensure all intended GTIDs are present] +include/assert_grep.inc [Ensure the next GTID binlogged is _not_ present] +# +# Case 2.b) With two binlog files, a --stop-position targeting the end +# of binlog 2 should: +# 1) not provide any warnings +# 2) not prevent b2 from outputting its entire binary log +# MYSQL_BINLOG --read-from-remote-server --stop-position=binlog_f2_end binlog_f1_full binlog_f2_full --result-file=tmp/warn_position_test_file.out 2>&1 +include/assert_grep.inc [Ensure a GTID exists for each transaction] +include/assert_grep.inc [Ensure the last GTID binlogged is present] +# +# Case 2.c) With two binlog files, a --stop-position targeting beyond +# the eof of binlog 2 should: +# 1) provide a warning that the stop position was not reached +# 2) not prevent b2 from outputting its entire binary log +# MYSQL_BINLOG --read-from-remote-server --stop-position=binlog_f2_over_eof binlog_f1_full binlog_f2_full --result-file=tmp/warn_position_test_file.out 2>&1 +WARNING: Did not reach stop position before end of input +include/assert_grep.inc [Ensure a GTID exists for each transaction] +# +# # Test using local binlog files # connection default; diff -Nru mariadb-10.11.11/mysql-test/suite/binlog/t/binlog_commit_fail.test mariadb-10.11.13/mysql-test/suite/binlog/t/binlog_commit_fail.test --- mariadb-10.11.11/mysql-test/suite/binlog/t/binlog_commit_fail.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/binlog/t/binlog_commit_fail.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,135 @@ +# Tests of commit time failures. +# At committing of an auto-commit statement a failure to commit in its +# binlog branch should rollback at least the transactional part of the statement. +# +# References: +# MDEV-35506 commit policy of one-phase-commit even at errored-out binlogging leads to assert +# MDEV-36027 Errored-out CREATE-SELECT does not binlog results of non-transactional table modification + +source include/have_innodb.inc; +source include/have_binlog_format_row.inc; + +set @@session.gtid_domain_id=1; +set @save_gtid_stric_mode=@@global.gtid_strict_mode; + +create table ta (a int) engine=aria; +create table ti (a int) engine=innodb; +create table ti_pk (a int primary key) engine=innodb; +create table t (a int) engine=innodb; +delimiter |; +create function f_i() +returns integer +begin + insert into ti set a=1; +return 1; +end | +create function f_ia(arg int) +returns integer +begin + insert into ti_pk set a=1; + insert into ta set a=1; + insert into ti_pk set a=arg; + return 1; +end | +delimiter ;| + +call mtr.add_suppression("Error writing file"); + +# Naturally all empty now +select count(*) as zero from t; +select count(*) as zero from ta; +select count(*) as zero from ti; + +# Force manual value assignement to gtid::seq_no while in the strict mode +# so that the value is rejected. Despite the errorred out statement +# being at its commit phase it will eventually be rolled back. +# Side effects of non-transactional engines, like Aria, are displayed. +--echo # 1. simple Innodb test +set @@global.gtid_strict_mode=0; set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +# mask possible allowed seq_no shift +--replace_regex /GTID 1-1-[0-9]+/GTID VALUE/ +--error ER_GTID_STRICT_OUT_OF_ORDER +insert into t set a=1; + +--echo # observe effective rollback +select count(*) as zero from t; + +--echo # 2. simple Aira test +set @@global.gtid_strict_mode=0; set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +--replace_regex /GTID 1-1-[0-9]+/GTID VALUE/ +--error ER_GTID_STRICT_OUT_OF_ORDER +insert into ta values (1),(2); + +--echo # note no rollback +select count(*) as '*NON-zero*' from ta; +# local cleanup +delete from ta; + +--echo # 3. multi-engine test +# A. non-transactional top-level +set @@global.gtid_strict_mode=0; set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +--replace_regex /GTID 1-1-[0-9]+/GTID VALUE/ +--error ER_GTID_STRICT_OUT_OF_ORDER +insert into ta set a=f_i(); +--echo # note no rollback.. +select count(*) as one from ta; +--echo # ..except transactional engine +select count(*) as zero from ti; +delete from ta; + +# B. non-transactional in the leaf +set @@global.gtid_strict_mode=0; set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +--replace_regex /GTID 1-1-[0-9]+/GTID VALUE/ +--error ER_GTID_STRICT_OUT_OF_ORDER +insert into t set a=f_ia(0); + +--echo # note no rollback.. +select count(*) as one from ta; +--echo # ..except transactional engine +select count(*) as zero from t; +select count(*) as zero from ti_pk; +delete from ta; + +--echo # 4. create-table-select-f() +--let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) +--let $binlog_start = query_get_value(SHOW MASTER STATUS, Position, 1) +# A. two phase commit branch +set @@global.gtid_strict_mode=0; set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +--replace_regex /GTID 1-1-[0-9]+/GTID VALUE/ +--error ER_GTID_STRICT_OUT_OF_ORDER +create table f_x (a int) select f_i() as a; +--echo # rollback indeed takes place in the pure transactional case +select count(*) as zero from ti; + +# B. one phase commit branch +--let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) +--let $binlog_start = query_get_value(SHOW MASTER STATUS, Position, 1) +set @@global.gtid_strict_mode=0; set @@session.gtid_seq_no=1; +set @@global.gtid_strict_mode=1; +--replace_regex /GTID 1-1-[0-9]+/GTID VALUE/ +--error ER_GTID_STRICT_OUT_OF_ORDER +create table t_x (a int) engine=aria select f_ia(0) as a; +--error ER_NO_SUCH_TABLE +select * from t_x; + +--echo # **TODO**: fix MDEV-36027 +--echo # **TODO**: the empty binlog is buggy .. +--source include/show_binlog_events.inc +--echo # .. as non-transactional `ta` (and `t_x` sic!) are modified +select count(*) as one from ta; +select count(*) as zero from ti; + +delete from ta; +--echo #. + +# cleanup + +set @@global.gtid_strict_mode=@save_gtid_stric_mode; +drop function f_i; +drop function f_ia; +drop table t, ta, ti, ti_pk; diff -Nru mariadb-10.11.11/mysql-test/suite/binlog/t/binlog_mysqlbinlog_warn_stop_position.test mariadb-10.11.13/mysql-test/suite/binlog/t/binlog_mysqlbinlog_warn_stop_position.test --- mariadb-10.11.11/mysql-test/suite/binlog/t/binlog_mysqlbinlog_warn_stop_position.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/binlog/t/binlog_mysqlbinlog_warn_stop_position.test 2025-05-19 16:14:24.000000000 +0000 @@ -64,13 +64,12 @@ --die Mid point chosen to end in binlog 2 does not exist in earlier binlog } -#--echo # -#--echo # -#--echo # Test using --read-from-remote-server -#--echo # -#--let $read_from_remote_server= 1 -#--emit warning is not supported by --read-from-remote-server now -#--source binlog_mysqlbinlog_warn_stop_position.inc +--echo # +--echo # +--echo # Test using --read-from-remote-server +--echo # +--let $read_from_remote_server= 1 +--source binlog_mysqlbinlog_warn_stop_position.inc --echo # --echo # diff -Nru mariadb-10.11.11/mysql-test/suite/binlog_encryption/encrypted_master.test mariadb-10.11.13/mysql-test/suite/binlog_encryption/encrypted_master.test --- mariadb-10.11.11/mysql-test/suite/binlog_encryption/encrypted_master.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/binlog_encryption/encrypted_master.test 2025-05-19 16:14:24.000000000 +0000 @@ -18,6 +18,7 @@ # - with annotated events, default checksums and minimal binlog row image # +--source include/long_test.inc # The test can take very long time with valgrind --source include/not_valgrind.inc diff -Nru mariadb-10.11.11/mysql-test/suite/binlog_encryption/rpl_parallel_innodb_lock_conflict.result mariadb-10.11.13/mysql-test/suite/binlog_encryption/rpl_parallel_innodb_lock_conflict.result --- mariadb-10.11.11/mysql-test/suite/binlog_encryption/rpl_parallel_innodb_lock_conflict.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/binlog_encryption/rpl_parallel_innodb_lock_conflict.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,16 +1,15 @@ ***MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts *** include/master-slave.inc [connection master] -connection server_2; -SET sql_log_bin=0; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CALL mtr.add_suppression("InnoDB: Transaction was aborted due to "); CALL mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); -SET sql_log_bin=1; +connection server_2; SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; include/stop_slave.inc SET GLOBAL slave_parallel_threads=10; CHANGE MASTER TO master_use_gtid=slave_pos; connection server_1; -ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB; INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); connect con1,127.0.0.1,root,,test,$SERVER_MYPORT_1,; diff -Nru mariadb-10.11.11/mysql-test/suite/encryption/r/doublewrite_debug.result mariadb-10.11.13/mysql-test/suite/encryption/r/doublewrite_debug.result --- mariadb-10.11.11/mysql-test/suite/encryption/r/doublewrite_debug.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/encryption/r/doublewrite_debug.result 2025-05-19 16:14:24.000000000 +0000 @@ -3,8 +3,9 @@ call mtr.add_suppression("InnoDB: Plugin initialization aborted"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); -create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0; -create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0; +create table t1 (f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=yes stats_persistent=0; +create table t2(f1 int primary key, f2 blob)engine=innodb encrypted=yes stats_persistent=0; +create table t3(f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=no stats_persistent=0; start transaction; insert into t1 values(1, repeat('#',12)); insert into t1 values(2, repeat('+',12)); @@ -12,29 +13,37 @@ insert into t1 values(4, repeat('-',12)); insert into t1 values(5, repeat('.',12)); insert into t2 select * from t1; +insert into t3 select * from t1; commit work; SET GLOBAL innodb_fast_shutdown = 0; # restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0 select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1'; select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2'; +select space into @t3_space_id from information_schema.innodb_sys_tablespaces where name='test/t3'; begin; insert into t1 values (6, repeat('%', 400)); insert into t2 values (6, repeat('%', 400)); +insert into t3 values (6, repeat('%', 400)); # xtrabackup prepare set global innodb_saved_page_number_debug = 3; set global innodb_fil_make_page_dirty_debug = @t1_space_id; set global innodb_saved_page_number_debug = 3; set global innodb_fil_make_page_dirty_debug = @t2_space_id; +set global innodb_saved_page_number_debug = 3; +set global innodb_fil_make_page_dirty_debug = @t3_space_id; set global innodb_buf_flush_list_now = 1; # Kill the server # restart -FOUND 2 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err +FOUND 3 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err check table t1; Table Op Msg_type Msg_text test.t1 check status OK check table t2; Table Op Msg_type Msg_text test.t2 check status OK +check table t3; +Table Op Msg_type Msg_text +test.t3 check status OK select f1, f2 from t1; f1 f2 1 ############ @@ -49,6 +58,13 @@ 3 //////////// 4 ------------ 5 ............ +select f1, f2 from t3; +f1 f2 +1 ############ +2 ++++++++++++ +3 //////////// +4 ------------ +5 ............ SET GLOBAL innodb_fast_shutdown = 0; # shutdown server # remove datadir @@ -78,4 +94,4 @@ 3 //////////// 4 ------------ 5 ............ -drop table t2, t1; +drop table t3, t2, t1; diff -Nru mariadb-10.11.11/mysql-test/suite/encryption/t/doublewrite_debug.opt mariadb-10.11.13/mysql-test/suite/encryption/t/doublewrite_debug.opt --- mariadb-10.11.11/mysql-test/suite/encryption/t/doublewrite_debug.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/encryption/t/doublewrite_debug.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1,3 +1,3 @@ --innodb-use-atomic-writes=0 ---innodb-encrypt-tables=FORCE +--innodb-encrypt-tables=on --innodb_sys_tablespaces diff -Nru mariadb-10.11.11/mysql-test/suite/encryption/t/doublewrite_debug.test mariadb-10.11.13/mysql-test/suite/encryption/t/doublewrite_debug.test --- mariadb-10.11.11/mysql-test/suite/encryption/t/doublewrite_debug.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/encryption/t/doublewrite_debug.test 2025-05-19 16:14:24.000000000 +0000 @@ -12,8 +12,9 @@ let MYSQLD_DATADIR=`select @@datadir`; let ALGO=`select @@innodb_checksum_algorithm`; -create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0; -create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0; +create table t1 (f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=yes stats_persistent=0; +create table t2(f1 int primary key, f2 blob)engine=innodb encrypted=yes stats_persistent=0; +create table t3(f1 int primary key, f2 blob)page_compressed=1 engine=innodb encrypted=no stats_persistent=0; start transaction; insert into t1 values(1, repeat('#',12)); @@ -22,6 +23,7 @@ insert into t1 values(4, repeat('-',12)); insert into t1 values(5, repeat('.',12)); insert into t2 select * from t1; +insert into t3 select * from t1; commit work; # Slow shutdown and restart to make sure ibuf merge is finished @@ -33,15 +35,17 @@ select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1'; select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2'; +select space into @t3_space_id from information_schema.innodb_sys_tablespaces where name='test/t3'; begin; insert into t1 values (6, repeat('%', 400)); insert into t2 values (6, repeat('%', 400)); +insert into t3 values (6, repeat('%', 400)); -# Copy the t1.ibd, t2.ibd file +# Copy the t1.ibd, t2.ibd, t3.ibd file let $targetdir=$MYSQLTEST_VARDIR/tmp/backup_1; --disable_result_log -exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir; +exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --skip-innodb-log-checkpoint-now --target-dir=$targetdir; --enable_result_log echo # xtrabackup prepare; @@ -54,8 +58,11 @@ set global innodb_saved_page_number_debug = 3; set global innodb_fil_make_page_dirty_debug = @t2_space_id; +set global innodb_saved_page_number_debug = 3; +set global innodb_fil_make_page_dirty_debug = @t3_space_id; + set global innodb_buf_flush_list_now = 1; ---let CLEANUP_IF_CHECKPOINT=drop table t1, t2, unexpected_checkpoint; +--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, t3, unexpected_checkpoint; --source ../../suite/innodb/include/no_checkpoint_end.inc # Corrupt the page 3 in t1.ibd, t2.ibd file perl; @@ -103,6 +110,15 @@ sysseek(FILE, 3*$page_size, 0); print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'}); close FILE; + +# Zero the complete page +my $fname= "$ENV{'MYSQLD_DATADIR'}test/t3.ibd"; +open(FILE, "+<", $fname) or die; +FILE->autoflush(1); +binmode FILE; +sysseek(FILE, 3*$page_size, 0); +print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'}); +close FILE; EOF # Successful recover from doublewrite buffer @@ -114,8 +130,10 @@ check table t1; check table t2; +check table t3; select f1, f2 from t1; select f1, f2 from t2; +select f1, f2 from t3; SET GLOBAL innodb_fast_shutdown = 0; let $shutdown_timeout=; @@ -220,4 +238,4 @@ --source ../../mariabackup/include/restart_and_restore.inc select * from t1; -drop table t2, t1; +drop table t3, t2, t1; diff -Nru mariadb-10.11.11/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt mariadb-10.11.13/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt --- mariadb-10.11.11/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1,2 +1,2 @@ ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M --innodb_encrypt_temporary_tables=1 diff -Nru mariadb-10.11.11/mysql-test/suite/engines/iuds/r/insert_time.result mariadb-10.11.13/mysql-test/suite/engines/iuds/r/insert_time.result --- mariadb-10.11.11/mysql-test/suite/engines/iuds/r/insert_time.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/engines/iuds/r/insert_time.result 2025-05-19 16:14:24.000000000 +0000 @@ -5073,10 +5073,14 @@ INSERT INTO t3(c1,c2) VALUES('34 9:23','34 9:23') /* throws error as row exists with c1='34 9:23',c2='34 9:23' */; ERROR 23000: Duplicate entry '825:23:00-825:23:00' for key 'idx' INSERT IGNORE INTO t1(c1,c2) VALUES('10:22:33','10:22:34') /* doesn't throw error */; +Warnings: +Warning 1062 Duplicate entry '10:22:33' for key 'PRIMARY' INSERT IGNORE INTO t2(c1,c2) VALUES('12:34:56.78','12:34:56.78') /*doesn't throw error */; Warnings: Warning 1062 Duplicate entry '12:34:56-12:34:56' for key 'PRIMARY' INSERT IGNORE INTO t1(c1,c2) VALUES('10:22:34','34 9:23') /*doesn't throw error */; +Warnings: +Warning 1062 Duplicate entry '825:23:00' for key 'c2' INSERT IGNORE INTO t3(c1,c2) VALUES('34 9:23','34 9:23') /*doesn't throw error */; Warnings: Warning 1062 Duplicate entry '825:23:00-825:23:00' for key 'idx' diff -Nru mariadb-10.11.11/mysql-test/suite/federated/federatedx.result mariadb-10.11.13/mysql-test/suite/federated/federatedx.result --- mariadb-10.11.11/mysql-test/suite/federated/federatedx.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/federated/federatedx.result 2025-05-19 16:14:24.000000000 +0000 @@ -79,7 +79,7 @@ `name` varchar(32) NOT NULL default '' ) ENGINE="FEDERATED" DEFAULT CHARSET=latin1 -CONNECTION='mysql://root@127.0.0.1:SLAVE_PORT/federated/t1'; +CONNECTION='mariadb://root@127.0.0.1:SLAVE_PORT/federated/t1'; INSERT INTO federated.t1 (id, name) VALUES (1, 'foo'); INSERT INTO federated.t1 (id, name) VALUES (2, 'fee'); INSERT INTO federated.t1 (id, `group`) VALUES (3, 42); diff -Nru mariadb-10.11.11/mysql-test/suite/federated/federatedx.test mariadb-10.11.13/mysql-test/suite/federated/federatedx.test --- mariadb-10.11.11/mysql-test/suite/federated/federatedx.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/federated/federatedx.test 2025-05-19 16:14:24.000000000 +0000 @@ -92,7 +92,7 @@ `name` varchar(32) NOT NULL default '' ) ENGINE="FEDERATED" DEFAULT CHARSET=latin1 - CONNECTION='mysql://root@127.0.0.1:$SLAVE_MYPORT/federated/t1'; + CONNECTION='mariadb://root@127.0.0.1:$SLAVE_MYPORT/federated/t1'; INSERT INTO federated.t1 (id, name) VALUES (1, 'foo'); INSERT INTO federated.t1 (id, name) VALUES (2, 'fee'); diff -Nru mariadb-10.11.11/mysql-test/suite/federated/federatedx_create_handlers.result mariadb-10.11.13/mysql-test/suite/federated/federatedx_create_handlers.result --- mariadb-10.11.11/mysql-test/suite/federated/federatedx_create_handlers.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/federated/federatedx_create_handlers.result 2025-05-19 16:14:24.000000000 +0000 @@ -479,12 +479,12 @@ INSERT INTO federated.t3 VALUES (1),(2),(3); CREATE TABLE federated.t4 (a INT); connection master; -CREATE SERVER fedlink FOREIGN DATA WRAPPER mysql +CREATE SERVER fedlink FOREIGN DATA WRAPPER mariadb OPTIONS (USER 'root', HOST '127.0.0.1', DATABASE 'federated', PORT SLAVE_PORT); CREATE TABLE federated.t3 (a INT) ENGINE=FEDERATED -CONNECTION='mysql://root@127.0.0.1:$SLAVE_MYPORT/federated/t3' +CONNECTION='mariadb://root@127.0.0.1:$SLAVE_MYPORT/federated/t3' PARTITION BY list (a) (PARTITION p1 VALUES IN (1) CONNECTION='fedlink/t3', PARTITION p2 VALUES IN (2) CONNECTION='fedlink/t4'); diff -Nru mariadb-10.11.11/mysql-test/suite/federated/federatedx_create_handlers.test mariadb-10.11.13/mysql-test/suite/federated/federatedx_create_handlers.test --- mariadb-10.11.11/mysql-test/suite/federated/federatedx_create_handlers.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/federated/federatedx_create_handlers.test 2025-05-19 16:14:24.000000000 +0000 @@ -7,9 +7,6 @@ set global federated_pushdown=1; -#Enable after fix MDEV-31846 or in v. 10.5 and later ---disable_cursor_protocol - connection slave; DROP TABLE IF EXISTS federated.t1; @@ -168,11 +165,13 @@ --sorted_result select * from federated.t4; +--disable_cursor_protocol select name into @var from federated.t1 where id=3 limit 1 ; select @var; --disable_ps2_protocol select name into outfile 'tmp.txt' from federated.t1; --enable_ps2_protocol +--enable_cursor_protocol let $path=`select concat(@@datadir, 'test/tmp.txt')`; remove_file $path; @@ -307,13 +306,13 @@ connection master; --replace_result $SLAVE_MYPORT SLAVE_PORT -eval CREATE SERVER fedlink FOREIGN DATA WRAPPER mysql +eval CREATE SERVER fedlink FOREIGN DATA WRAPPER mariadb OPTIONS (USER 'root', HOST '127.0.0.1', DATABASE 'federated', PORT $SLAVE_MYPORT); CREATE TABLE federated.t3 (a INT) ENGINE=FEDERATED - CONNECTION='mysql://root@127.0.0.1:$SLAVE_MYPORT/federated/t3' + CONNECTION='mariadb://root@127.0.0.1:$SLAVE_MYPORT/federated/t3' PARTITION BY list (a) (PARTITION p1 VALUES IN (1) CONNECTION='fedlink/t3', PARTITION p2 VALUES IN (2) CONNECTION='fedlink/t4'); @@ -439,7 +438,5 @@ set global federated_pushdown=0; ---enable_cursor_protocol - source include/federated_cleanup.inc; diff -Nru mariadb-10.11.11/mysql-test/suite/funcs_2/t/innodb_charset.test mariadb-10.11.13/mysql-test/suite/funcs_2/t/innodb_charset.test --- mariadb-10.11.11/mysql-test/suite/funcs_2/t/innodb_charset.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/funcs_2/t/innodb_charset.test 2025-05-19 16:14:24.000000000 +0000 @@ -6,7 +6,7 @@ # Checking of other prerequisites is in charset_master.test # ################################################################################ ---source include/no_valgrind_without_big.inc +--source include/long_test.inc --source include/have_innodb.inc # Starting with MariaDB 10.6, ensure that DDL recovery will have completed diff -Nru mariadb-10.11.11/mysql-test/suite/galera/disabled.def mariadb-10.11.13/mysql-test/suite/galera/disabled.def --- mariadb-10.11.11/mysql-test/suite/galera/disabled.def 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/disabled.def 2025-05-19 16:14:24.000000000 +0000 @@ -10,5 +10,7 @@ # ############################################################################## -galera_sequences : MDEV-35934/MDEV-33850 For Galera, create sequence with low cache got signal 6 error: [ERROR] WSREP: FSM: no such a transition REPLICATING -> COMMITTED -MDEV-26266 : MDEV-26266 +galera_wan : MDEV-35940 Unallowed state transition: donor -> synced in galera_wan +galera_vote_rejoin_ddl : MDEV-35940 Unallowed state transition: donor -> synced in galera_wan +MW-329 : MDEV-35951 Complete freeze during MW-329 test +galera_vote_rejoin_dml : MDEV-35964 Assertion `ist_seqno >= cc_seqno' failed in galera_vote_rejoin_dml diff -Nru mariadb-10.11.11/mysql-test/suite/galera/galera_2nodes.cnf mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes.cnf --- mariadb-10.11.11/mysql-test/suite/galera/galera_2nodes.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -17,7 +17,7 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address=gcomm:// -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.1.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.1.port wsrep_sst_receive_address='127.0.0.1:@mysqld.1.#sst_port' @@ -28,7 +28,7 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.2.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.2.port wsrep_sst_receive_address='127.0.0.1:@mysqld.2.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/galera_2nodes_as_master.cnf mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_master.cnf --- mariadb-10.11.11/mysql-test/suite/galera/galera_2nodes_as_master.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_master.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -25,7 +25,7 @@ #sst_port=@OPT.port wsrep_provider=@ENV.WSREP_PROVIDER wsrep_cluster_address=gcomm:// -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.1.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.1.port wsrep_sst_receive_address='127.0.0.1:@mysqld.1.#sst_port' @@ -38,7 +38,7 @@ #sst_port=@OPT.port wsrep_provider=@ENV.WSREP_PROVIDER wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.2.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.2.port wsrep_sst_receive_address='127.0.0.1:@mysqld.2.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/galera_2nodes_as_replica_2primary.cnf mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_replica_2primary.cnf --- mariadb-10.11.11/mysql-test/suite/galera/galera_2nodes_as_replica_2primary.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_replica_2primary.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -24,7 +24,7 @@ #sst_port=@OPT.port wsrep_provider=@ENV.WSREP_PROVIDER wsrep_cluster_address=gcomm:// -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.1.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.1.port wsrep_sst_receive_address='127.0.0.1:@mysqld.1.#sst_port' @@ -37,7 +37,7 @@ #sst_port=@OPT.port wsrep_provider=@ENV.WSREP_PROVIDER wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.2.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.2.port wsrep_sst_receive_address='127.0.0.1:@mysqld.2.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/galera_2nodes_as_slave.cnf mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_slave.cnf --- mariadb-10.11.11/mysql-test/suite/galera/galera_2nodes_as_slave.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/galera_2nodes_as_slave.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -24,7 +24,7 @@ #sst_port=@OPT.port wsrep_provider=@ENV.WSREP_PROVIDER wsrep_cluster_address=gcomm:// -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.1.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.1.port wsrep_sst_receive_address='127.0.0.1:@mysqld.1.#sst_port' @@ -37,7 +37,7 @@ #sst_port=@OPT.port wsrep_provider=@ENV.WSREP_PROVIDER wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.2.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.2.port wsrep_sst_receive_address='127.0.0.1:@mysqld.2.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/galera_3nodes_as_slave.cnf mariadb-10.11.13/mysql-test/suite/galera/galera_3nodes_as_slave.cnf --- mariadb-10.11.11/mysql-test/suite/galera/galera_3nodes_as_slave.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/galera_3nodes_as_slave.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -24,7 +24,7 @@ #sst_port=@OPT.port wsrep_provider=@ENV.WSREP_PROVIDER wsrep_cluster_address=gcomm:// -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.1.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.1.port wsrep_sst_receive_address='127.0.0.1:@mysqld.1.#sst_port' @@ -37,7 +37,7 @@ #sst_port=@OPT.port wsrep_provider=@ENV.WSREP_PROVIDER wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.2.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.2.port wsrep_sst_receive_address='127.0.0.1:@mysqld.2.#sst_port' @@ -50,7 +50,7 @@ #sst_port=@OPT.port wsrep-provider=@ENV.WSREP_PROVIDER wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.3.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.3.port wsrep_sst_receive_address='127.0.0.1:@mysqld.3.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/galera_4nodes.cnf mariadb-10.11.13/mysql-test/suite/galera/galera_4nodes.cnf --- mariadb-10.11.11/mysql-test/suite/galera/galera_4nodes.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/galera_4nodes.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -18,7 +18,7 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address=gcomm:// -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.1.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.1.port wsrep_sst_receive_address='127.0.0.1:@mysqld.1.#sst_port' @@ -30,7 +30,7 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.2.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.2.port wsrep_sst_receive_address='127.0.0.1:@mysqld.2.#sst_port' @@ -42,7 +42,7 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.3.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.3.port wsrep_sst_receive_address='127.0.0.1:@mysqld.3.#sst_port' @@ -54,7 +54,7 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.4.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.4.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.4.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.4.port wsrep_sst_receive_address='127.0.0.1:@mysqld.4.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/include/auto_increment_offset_save.inc mariadb-10.11.13/mysql-test/suite/galera/include/auto_increment_offset_save.inc --- mariadb-10.11.11/mysql-test/suite/galera/include/auto_increment_offset_save.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/include/auto_increment_offset_save.inc 2025-05-19 16:14:24.000000000 +0000 @@ -42,4 +42,3 @@ --connection $node_4 let $auto_increment_offset_node_4 = `SELECT @@global.auto_increment_offset`; } - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/include/galera_dump_sr_table.inc mariadb-10.11.13/mysql-test/suite/galera/include/galera_dump_sr_table.inc --- mariadb-10.11.11/mysql-test/suite/galera/include/galera_dump_sr_table.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/include/galera_dump_sr_table.inc 2025-05-19 16:14:24.000000000 +0000 @@ -25,4 +25,3 @@ --inc $seqno } - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/include/galera_st_shutdown_slave.inc mariadb-10.11.13/mysql-test/suite/galera/include/galera_st_shutdown_slave.inc --- mariadb-10.11.11/mysql-test/suite/galera/include/galera_st_shutdown_slave.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/include/galera_st_shutdown_slave.inc 2025-05-19 16:14:24.000000000 +0000 @@ -118,4 +118,3 @@ SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; DROP TABLE t1; COMMIT; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/include/galera_start_replication.inc mariadb-10.11.13/mysql-test/suite/galera/include/galera_start_replication.inc --- mariadb-10.11.11/mysql-test/suite/galera/include/galera_start_replication.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/include/galera_start_replication.inc 2025-05-19 16:14:24.000000000 +0000 @@ -41,9 +41,9 @@ my $counter = 1000; #my $found = false - + while ($counter > 0) { - + open(FILE, "$logfile") or die("Unable to open $logfile : $!\n"); my $new_sync_count = () = grep(/Synchronized with group/g,); close(FILE); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/include/galera_wsrep_recover.inc mariadb-10.11.13/mysql-test/suite/galera/include/galera_wsrep_recover.inc --- mariadb-10.11.11/mysql-test/suite/galera/include/galera_wsrep_recover.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/include/galera_wsrep_recover.inc 2025-05-19 16:14:24.000000000 +0000 @@ -9,14 +9,14 @@ } --perl - use strict; + use strict; my $wsrep_start_position_str = "grep -a 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'"; my $wsrep_start_position = `grep -a 'WSREP: Recovered position:' $ENV{MYSQL_TMP_DIR}/galera_wsrep_recover.log | sed 's/.*WSREP\:\ Recovered\ position://' | sed 's/^[ \t]*//'`; chomp($wsrep_start_position); die if $wsrep_start_position eq ''; - open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/galera_wsrep_start_position.inc") or die; + open(FILE, ">", "$ENV{MYSQL_TMP_DIR}/galera_wsrep_start_position.inc") or die; print FILE "--let \$galera_wsrep_start_position = $wsrep_start_position\n"; close FILE; EOF diff -Nru mariadb-10.11.11/mysql-test/suite/galera/include/wait_condition_with_debug_and_kill.inc mariadb-10.11.13/mysql-test/suite/galera/include/wait_condition_with_debug_and_kill.inc --- mariadb-10.11.11/mysql-test/suite/galera/include/wait_condition_with_debug_and_kill.inc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/include/wait_condition_with_debug_and_kill.inc 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,35 @@ +# include/wait_condition_with_debug_and_kill.inc +# +# SUMMARY +# +# Waits until the passed statement returns true, or the operation +# times out. If the operation times out, the additional error +# statement will be executed and server is killed. +# +# USAGE +# +# let $wait_condition= +# SELECT c = 3 FROM t; +# let $wait_condition_on_error_output= select count(*) from t; +# [let $explicit_default_wait_timeout= N] # to override the default reset +# --source include/wait_condition_with_debug_and_kill.inc +# +# OR +# +# let $wait_timeout= 60; # Override default 30 seconds with 60. +# let $wait_condition= +# SELECT c = 3 FROM t; +# let $wait_condition_on_error_output= select count(*) from t; +# --source include/wait_condition_with_debug_and_kill.inc +# --echo Executed the test condition $wait_condition_reps times +# +# +# EXAMPLE +# events_bugs.test, events_time_zone.test +# + +--source include/wait_condition_with_debug.inc +if (!$success) +{ + --source include/kill_galera.inc +} diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/GAL-401.result mariadb-10.11.13/mysql-test/suite/galera/r/GAL-401.result --- mariadb-10.11.11/mysql-test/suite/galera/r/GAL-401.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/GAL-401.result 2025-05-19 16:14:24.000000000 +0000 @@ -24,6 +24,6 @@ PRIMARY KEY (`f1`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci DROP TABLE t1; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender (.*) is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender .+ ?is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); connection node_1; SET GLOBAL wsrep_provider_options = 'pc.ignore_sb=false'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-20225.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-20225.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-20225.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-20225.result 2025-05-19 16:14:24.000000000 +0000 @@ -15,7 +15,7 @@ SET GLOBAL debug_dbug = 'RESET'; SET DEBUG_SYNC = 'now SIGNAL signal.mdev_20225_continue'; SET DEBUG_SYNC = 'RESET'; -SET GLOBAL wsrep_slave_threads = 1; +SET GLOBAL wsrep_slave_threads = DEFAULT; connection node_2; SHOW TRIGGERS; Trigger Event Table Statement Timing Created sql_mode Definer character_set_client collation_connection Database Collation diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-20793.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-20793.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-20793.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-20793.result 2025-05-19 16:14:24.000000000 +0000 @@ -41,4 +41,4 @@ ERROR 40001: Deadlock found when trying to get lock; try restarting transaction SET debug_sync = "RESET"; DROP TABLE t1; -SET GLOBAL wsrep_slave_threads = 1; +SET GLOBAL wsrep_slave_threads = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-21479.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-21479.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-21479.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-21479.result 2025-05-19 16:14:24.000000000 +0000 @@ -66,7 +66,7 @@ Variable_name Value wsrep_desync_count 0 SET @@global.wsrep_desync = 0; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender (.*) is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender .+ ?is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); connection node_1; # Wait until both nodes are back to cluster SET GLOBAL wsrep_provider_options = 'pc.ignore_sb=false'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-25389.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-25389.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-25389.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-25389.result 2025-05-19 16:14:24.000000000 +0000 @@ -15,3 +15,4 @@ SELECT @@wsrep_slave_threads; @@wsrep_slave_threads 1 +connection node_2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-26266.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-26266.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-26266.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-26266.result 2025-05-19 16:14:24.000000000 +0000 @@ -19,5 +19,5 @@ INSERT INTO t2 VALUES (4); INSERT INTO t2 VALUES (5); CREATE VIEW v1 AS SELECT c1 FROM t1 WHERE c1 IN (SELECT a FROM t2) GROUP BY c1; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +DROP VIEW v1; DROP TABLE t1,t2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-33136.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-33136.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-33136.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-33136.result 2025-05-19 16:14:24.000000000 +0000 @@ -4,7 +4,7 @@ connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; connection node_1a; -TRUNCATE TABLE t1; +RENAME TABLE t1 TO tmp, tmp TO t1; SET SESSION wsrep_retry_autocommit = 0; SET DEBUG_SYNC = 'dict_stats_mdl_acquired SIGNAL may_toi WAIT_FOR bf_abort'; INSERT INTO t1 VALUES (1); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-34647.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-34647.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-34647.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-34647.result 2025-05-19 16:14:24.000000000 +0000 @@ -95,7 +95,6 @@ 4 d 5 d 6 d -set global wsrep_mode=default; connection node_1; drop table t1,t2,t3,t4,t5; set global wsrep_mode=default; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-35748.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-35748.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-35748.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-35748.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,31 @@ +connection node_2; +connection node_1; +connection node_1; +INSTALL PLUGIN IF NOT EXISTS connect SONAME 'ha_connect'; +CREATE TABLE t1 (f INT) ENGINE=CONNECT; +Warnings: +Warning 1105 No table_type. Will be set to DOS +Warning 1105 No file name. Table will use t1.dos +CREATE TABLE t2 (f INT) ENGINE=ROCKSDB; +CREATE TABLE t3 (f INT) ENGINE=SEQUENCE; +ERROR 42000: This version of MariaDB doesn't yet support 'non-InnoDB sequences in Galera cluster' +show warnings; +Level Code Message +Error 1235 This version of MariaDB doesn't yet support 'non-InnoDB sequences in Galera cluster' +Note 1235 ENGINE=SEQUENCE not supported by Galera +connection node_2; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `f` int(11) DEFAULT NULL +) ENGINE=CONNECT DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `f` int(11) DEFAULT NULL +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +show create table t3; +ERROR 42S02: Table 'test.t3' doesn't exist +connection node_1; +DROP TABLE t1, t2; +UNINSTALL PLUGIN IF EXISTS connect; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-35946.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-35946.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-35946.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-35946.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,16 @@ +connection node_2; +connection node_1; +connection node_1; +connection node_2; +SET GLOBAL wsrep_provider_options = 'gmcast.isolate=1'; +SET SESSION wsrep_sync_wait=0; +SET SESSION wsrep_sync_wait=DEFAULT; +DELETE FROM mysql.wsrep_streaming_log; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +SET SESSION wsrep_sync_wait=0; +SET GLOBAL wsrep_provider_options = 'gmcast.isolate=0'; +SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +VARIABLE_VALUE +Primary +SET SESSION wsrep_sync_wait=DEFAULT; +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender .+ ?is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-36116.result mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-36116.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MDEV-36116.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MDEV-36116.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,22 @@ +connection node_2; +connection node_1; +connect con1,127.0.0.1,root,,test,$NODE_MYPORT_1; +connection node_1; +CALL mtr.add_suppression("CREATE TABLE isolation failure"); +SET DEBUG_SYNC = 'wsrep_kill_thd_before_enter_toi SIGNAL may_kill WAIT_FOR continue'; +CREATE TABLE t1 (a INT) ENGINE=InnoDB; +connection con1; +SET DEBUG_SYNC = 'now WAIT_FOR may_kill'; +SET DEBUG_SYNC = 'now SIGNAL continue'; +connection node_1; +Got one of the listed errors +connection node_2; +SHOW TABLES LIKE 't1'; +Tables_in_test (t1) +connection con1; +SHOW TABLES LIKE 't1'; +Tables_in_test (t1) +SET DEBUG_SYNC = 'RESET'; +disconnect con1; +disconnect node_2; +disconnect node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MW-284.result mariadb-10.11.13/mysql-test/suite/galera/r/MW-284.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MW-284.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MW-284.result 2025-05-19 16:14:24.000000000 +0000 @@ -13,7 +13,7 @@ SELECT @@wsrep_on; @@wsrep_on 0 -call mtr.add_suppression("Error reading packet from server: WSREP has not yet prepared node for application use (server_errno=1047)"); +call mtr.add_suppression("Error reading packet from server: WSREP has not yet prepared node for application use \\(server_errno ?= ?1047\\)"); START SLAVE; include/wait_for_slave_param.inc [Slave_IO_Running] connection node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MW-329.result mariadb-10.11.13/mysql-test/suite/galera/r/MW-329.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MW-329.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MW-329.result 2025-05-19 16:14:24.000000000 +0000 @@ -18,5 +18,6 @@ connection node_1; DROP PROCEDURE proc_insert; DROP TABLE t1; +disconnect node_1b; CALL mtr.add_suppression("WSREP: .* conflict state after post commit "); set global innodb_status_output=Default; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MW-329F.result mariadb-10.11.13/mysql-test/suite/galera/r/MW-329F.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MW-329F.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MW-329F.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,25 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (f1 INTEGER, f2 CHAR(20) DEFAULT 'abc') ENGINE=InnoDB; +INSERT INTO t1 (f1) VALUES (1),(65535); +CREATE PROCEDURE proc_insert (repeat_count int) +BEGIN +DECLARE current_num int; +DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; +SET current_num = 0; +SET SESSION wsrep_sync_wait = 0; +WHILE current_num < repeat_count do +INSERT INTO t1 (f1) VALUES (FLOOR( 1 + RAND( ) * 65535 )); +SELECT SLEEP(0.1); +SET current_num = current_num + 1; +END WHILE; +END| +connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1b; +connection node_1b; +connection node_1; +DROP PROCEDURE proc_insert; +DROP TABLE t1; +disconnect node_1b; +CALL mtr.add_suppression("WSREP: .* conflict state after post commit "); +set global innodb_status_output=Default; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/MW-416.result mariadb-10.11.13/mysql-test/suite/galera/r/MW-416.result --- mariadb-10.11.11/mysql-test/suite/galera/r/MW-416.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/MW-416.result 2025-05-19 16:14:24.000000000 +0000 @@ -20,13 +20,13 @@ Got one of the listed errors CREATE DATABASE db; Got one of the listed errors -CREATE EVENT ev1 ON SCHEDULE AT CURRENT_TIMESTAMP DO SELECT 1; +CREATE EVENT ev1 ON SCHEDULE AT CURRENT_TIMESTAMP DO SELECT 1; Got one of the listed errors CREATE FUNCTION fun1() RETURNS int RETURN(1); Got one of the listed errors CREATE FUNCTION fun1 RETURNS STRING SONAME 'funlib.so'; Got one of the listed errors -CREATE PROCEDURE proc1() BEGIN END; +CREATE PROCEDURE proc1() BEGIN END; Got one of the listed errors CREATE INDEX idx ON tbl(id); Got one of the listed errors @@ -100,3 +100,4 @@ performance_schema sys test +disconnect userMW416; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_2primary_replica.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_2primary_replica.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_2primary_replica.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_2primary_replica.result 2025-05-19 16:14:24.000000000 +0000 @@ -13,10 +13,13 @@ connect replica, 127.0.0.1, root, , test, $NODE_MYPORT_1; connection replica; connection node_2; +connection primary1; +connection primary2; connection replica; # Galera replica changing master to primary1 -SET @@default_master_connection='stream2'; +SET @@default_master_connection='stream1'; # Primary node changing master to primary2 +SET @@default_master_connection='stream2'; START ALL SLAVES; Warnings: Note 1937 SLAVE 'stream1' started diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_alter_engine_myisam.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_alter_engine_myisam.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_alter_engine_myisam.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_alter_engine_myisam.result 2025-05-19 16:14:24.000000000 +0000 @@ -26,3 +26,4 @@ 1 DROP TABLE t1; connection node_1; +SET GLOBAL wsrep_mode = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_applier_ftwrl_table_alter.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_applier_ftwrl_table_alter.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_applier_ftwrl_table_alter.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_applier_ftwrl_table_alter.result 2025-05-19 16:14:24.000000000 +0000 @@ -13,7 +13,7 @@ SELECT 1 FROM DUAL; 1 1 -SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock'; +SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); COUNT(*) = 1 1 UNLOCK TABLES; @@ -25,7 +25,7 @@ `f2` int(11) DEFAULT NULL, PRIMARY KEY (`f1`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock'; +SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); COUNT(*) = 0 1 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_as_slave_nonprim.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_as_slave_nonprim.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_as_slave_nonprim.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_as_slave_nonprim.result 2025-05-19 16:14:24.000000000 +0000 @@ -12,7 +12,6 @@ connection node_4; INSERT INTO t1 VALUES (1),(2),(3),(4),(5); connection node_2; -connection node_1; expected_error 1 connection node_2; @@ -27,7 +26,7 @@ RESET SLAVE ALL; CALL mtr.add_suppression("Slave SQL: Error 'Unknown command' on query"); CALL mtr.add_suppression("Slave: Unknown command Error_code: 1047"); -CALL mtr.add_suppression("Transport endpoint is not connected"); +CALL mtr.add_suppression("(Transport endpoint|Socket) is not connected"); CALL mtr.add_suppression("Slave SQL: Error in Xid_log_event: Commit could not be completed, 'Deadlock found when trying to get lock; try restarting transaction', Error_code: 1213"); CALL mtr.add_suppression("Slave SQL: Node has dropped from cluster, Error_code: 1047"); connection node_4; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_abort_group_commit.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_group_commit.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_abort_group_commit.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_group_commit.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,685 +0,0 @@ -SET SESSION wsrep_sync_wait = 0; -galera_sr_bf_abort_at_commit = 0 -after_replicate_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 1; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,after_replicate_sync'; -INSERT INTO t1 VALUES (3); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=after_replicate_sync'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -local_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 1; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,local_monitor_master_enter_sync'; -INSERT INTO t1 VALUES (3); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=local_monitor_master_enter_sync'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -apply_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 1; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_master_enter_sync'; -INSERT INTO t1 VALUES (3); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_master_enter_sync'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -commit_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 1; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; -INSERT INTO t1 VALUES (3); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -galera_sr_bf_abort_at_commit = 1 -after_replicate_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 1; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,after_replicate_sync'; -COMMIT; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=after_replicate_sync'; -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -local_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 1; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,local_monitor_master_enter_sync'; -COMMIT; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=local_monitor_master_enter_sync'; -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -apply_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 1; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_master_enter_sync'; -COMMIT; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_master_enter_sync'; -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -commit_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 1; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; -COMMIT; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -galera_sr_bf_abort_at_commit = 1 -after_replicate_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 0; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,after_replicate_sync'; -COMMIT; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=after_replicate_sync'; -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -local_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 0; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,local_monitor_master_enter_sync'; -COMMIT; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=local_monitor_master_enter_sync'; -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -apply_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 0; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_master_enter_sync'; -COMMIT; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_master_enter_sync'; -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -commit_monitor_master_enter_sync -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; -SET SESSION wsrep_trx_fragment_size = 0; -SET AUTOCOMMIT=OFF; -INSERT INTO t1 VALUES (1); -SELECT * FROM t1 FOR UPDATE; -f1 -1 -SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; -SET AUTOCOMMIT=ON; -INSERT INTO t1 VALUES (2); -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; -COMMIT; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; -SET SESSION wsrep_on = 0; -SET SESSION wsrep_on = 1; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; -ROLLBACK; -SET GLOBAL wsrep_provider_options = 'dbug='; -SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT * FROM t1; -f1 -1 -2 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 2; -COUNT(*) = 1 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log; -COUNT(*) = 0 -1 -SET AUTOCOMMIT=ON; -SET SESSION wsrep_trx_fragment_size = 0; -DELETE FROM t1; -DROP TABLE t1; -CALL mtr.add_suppression("WSREP: fragment replication failed: 1"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_abort_lock_table.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_lock_table.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_abort_lock_table.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_lock_table.result 2025-05-19 16:14:24.000000000 +0000 @@ -7,6 +7,7 @@ connection node_1; INSERT INTO t1 VALUES (2); connection node_2; +SET SESSION wsrep_sync_wait = 0; UNLOCK TABLES; COMMIT; SELECT COUNT(*) = 1 FROM t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_abort_mariabackup.result 2025-05-19 16:14:24.000000000 +0000 @@ -53,7 +53,7 @@ FOUND 1 /Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used./ in mysqld.2.err # Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT FOUND 1 /Server desynched from group during BACKUP STAGE BLOCK_COMMIT./ in mysqld.2.err -SET GLOBAL wsrep_mode = ""; +SET GLOBAL wsrep_mode = DEFAULT; connection node_1; DROP TABLE t; disconnect node_2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_kill,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_kill,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_kill,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_kill,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,5 @@ ---- a/home/panda/mariadb-10.5/mysql-test/suite/galera/r/galera_bf_kill.result -+++ b/home/panda/mariadb-10.5/mysql-test/suite/galera/r/galera_bf_kill.reject +--- r/galera_bf_kill.result ++++ r/galera_bf_kill,debug.reject @@ -77,4 +77,34 @@ a b 5 2 disconnect node_2a; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_kill_debug.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_kill_debug.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_bf_kill_debug.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_bf_kill_debug.result 2025-05-19 16:14:24.000000000 +0000 @@ -40,18 +40,19 @@ disconnect node_2a; connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; connection node_2a; -CREATE TABLE t1 (i int primary key); +CREATE TABLE t1 (i int primary key) engine=innodb; SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; INSERT INTO t1 VALUES (1); connection node_2; SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached"; SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; -SET DEBUG_SYNC='RESET'; connection node_2a; connection node_2; +SET DEBUG_SYNC='RESET'; select * from t1; i 1 disconnect node_2a; +disconnect node_2b; connection node_1; drop table t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_binlog_checksum.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_binlog_checksum.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_binlog_checksum.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_binlog_checksum.result 2025-05-19 16:14:24.000000000 +0000 @@ -27,4 +27,5 @@ 1 connection node_1; DROP TABLE t1; +SET @@global.wsrep_mode=DEFAULT; # End of tests. diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_binlog_stmt_autoinc.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_binlog_stmt_autoinc.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_binlog_stmt_autoinc.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_binlog_stmt_autoinc.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,11 +1,11 @@ connection node_2; connection node_1; connection node_1; -SET GLOBAL auto_increment_offset=1; connection node_2; -SET GLOBAL auto_increment_offset=2; connection node_1; +SET GLOBAL auto_increment_offset=1; connection node_2; +SET GLOBAL auto_increment_offset=2; connection node_2; SET GLOBAL wsrep_forced_binlog_format='STATEMENT'; connection node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_circular_replication.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_circular_replication.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_circular_replication.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_circular_replication.result 2025-05-19 16:14:24.000000000 +0000 @@ -12,6 +12,7 @@ connection replica1; connection node_2; connection primary2; +connection primary1; connection replica1; # Galera replica changing master to primary1 START SLAVE; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ddl_fk_conflict.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_ddl_fk_conflict.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ddl_fk_conflict.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ddl_fk_conflict.result 2025-05-19 16:14:24.000000000 +0000 @@ -298,6 +298,7 @@ ###################################################################### connection node_1; SET SESSION wsrep_sync_wait=0; +FLUSH STATUS; CREATE TABLE p1 (pk INTEGER PRIMARY KEY, f2 CHAR(30)); INSERT INTO p1 VALUES (1, 'INITIAL VALUE'); CREATE TABLE p2 (pk INTEGER PRIMARY KEY, f2 CHAR(30)); @@ -491,6 +492,7 @@ ###################################################################### connection node_1; SET SESSION wsrep_sync_wait=0; +FLUSH STATUS; CREATE TABLE p1 (pk INTEGER PRIMARY KEY, f2 CHAR(30)); INSERT INTO p1 VALUES (1, 'INITIAL VALUE'); CREATE TABLE p2 (pk INTEGER PRIMARY KEY, f2 CHAR(30)); @@ -684,6 +686,7 @@ ###################################################################### connection node_1; SET SESSION wsrep_sync_wait=0; +FLUSH STATUS; CREATE TABLE p1 (pk INTEGER PRIMARY KEY, f2 CHAR(30)); INSERT INTO p1 VALUES (1, 'INITIAL VALUE'); CREATE TABLE p2 (pk INTEGER PRIMARY KEY, f2 CHAR(30)); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_defaults.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_defaults.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_defaults.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_defaults.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,9 @@ connection node_2; connection node_1; # Correct Galera library found +SELECT COUNT(*) `expect 51` FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME LIKE 'wsrep_%'; +expect 51 +51 SELECT VARIABLE_NAME, VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME LIKE 'wsrep_%' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_gcs_fragment.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_gcs_fragment.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_gcs_fragment.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_gcs_fragment.result 2025-05-19 16:14:24.000000000 +0000 @@ -22,7 +22,7 @@ connection node_1a; SET GLOBAL wsrep_provider_options = 'signal=gcs_core_after_frag_send'; connection node_1; -ERROR HY000: Got error 6 "No such device or address" during COMMIT +ERROR HY000: Error while appending streaming replication fragment(provider status: Not connected to Primary Component) INSERT INTO t1 VALUES (3, "cccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); SELECT * FROM t1; f1 f2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_inject_bf_long_wait.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_inject_bf_long_wait.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_inject_bf_long_wait.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_inject_bf_long_wait.result 2025-05-19 16:14:24.000000000 +0000 @@ -3,8 +3,11 @@ CREATE TABLE t1(id int not null primary key, b int) engine=InnoDB; INSERT INTO t1 VALUES (0,0),(1,1),(2,2),(3,3); BEGIN; +SET DEBUG_SYNC = 'wsrep_after_statement_enter SIGNAL blocked'; UPDATE t1 set b = 100 where id between 1 and 2;; connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET DEBUG_SYNC = 'now WAIT_FOR blocked'; +SET DEBUG_SYNC = 'wsrep_after_statement_enter CLEAR'; connection node_1b; SET @save_dbug = @@SESSION.debug_dbug; SET @@SESSION.innodb_lock_wait_timeout=2; @@ -20,5 +23,6 @@ 1 100 2 100 3 3 +SET DEBUG_SYNC = 'RESET'; disconnect node_1b; DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_MDEV-28423,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,5 @@ ---- suite/galera/r/galera_ist_MDEV-28423.result 2022-06-13 09:40:33.073863796 +0300 -+++ suite/galera/r/galera_ist_MDEV-28423.reject 2022-06-13 09:58:59.936874991 +0300 +--- r/galera_ist_MDEV-28423.result ++++ r/galera_ist_MDEV-28423,debug.reject @@ -517,3 +517,187 @@ 1 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_MDEV-28583,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,5 @@ ---- suite/galera/r/galera_ist_MDEV-28583.result 2022-06-11 10:48:16.875034382 +0300 -+++ suite/galera/r/galera_ist_MDEV-28583,debug.reject 2022-06-11 11:25:55.616481509 +0300 +--- r/galera_ist_MDEV-28583.result ++++ r/galera_ist_MDEV-28583,debug.reject @@ -517,3 +517,187 @@ 1 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_mysqldump,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_mysqldump,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,13 +1,12 @@ --- r/galera_ist_mysqldump.result +++ r/galera_ist_mysqldump,debug.reject -@@ -354,11 +354,195 @@ +@@ -354,6 +354,190 @@ 1 DROP TABLE t1; COMMIT; +Performing State Transfer on a server that has been killed and restarted +while a DDL was in progress on it - connection node_1; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); ++connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; @@ -189,12 +188,6 @@ +DROP TABLE t1; +COMMIT; +SET GLOBAL debug_dbug = $debug_orig; -+connection node_1; -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); + connection node_1; + CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); DROP USER sst; - connection node_2; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); - CALL mtr.add_suppression("Can't open and lock time zone table"); - CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_mysqldump,release.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump,release.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_mysqldump,release.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump,release.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ ---- r/galera_ist_mysqldump.result -+++ r/galera_ist_mysqldump.reject -@@ -355,10 +355,10 @@ - DROP TABLE t1; - COMMIT; - connection node_1; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - DROP USER sst; - connection node_2; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); - CALL mtr.add_suppression("Can't open and lock time zone table"); - CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_mysqldump.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ist_mysqldump.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ist_mysqldump.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,8 @@ connection node_2; connection node_1; +call mtr.add_suppression("WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to "); +connection node_1; +connection node_2; Setting SST method to mysqldump ... call mtr.add_suppression("WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to '127\\.0\\.0\\.1'"); call mtr.add_suppression("Failed to load slave replication state from table mysql\\.gtid_slave_pos"); @@ -9,9 +12,6 @@ SET GLOBAL wsrep_sst_auth = 'sst:'; connection node_2; SET GLOBAL wsrep_sst_method = 'mysqldump'; -call mtr.add_suppression("WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to "); -connection node_1; -connection node_2; Performing State Transfer on a server that has been shut down cleanly and restarted connection node_1; CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; @@ -355,10 +355,10 @@ DROP TABLE t1; COMMIT; connection node_1; -CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); +CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); DROP USER sst; connection node_2; -CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); +CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); CALL mtr.add_suppression("Can't open and lock time zone table"); CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_nonPK_and_PA.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_nonPK_and_PA.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_nonPK_and_PA.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_nonPK_and_PA.result 2025-05-19 16:14:24.000000000 +0000 @@ -8,7 +8,7 @@ SET SESSION wsrep_sync_wait = 0; SET GLOBAL wsrep_slave_threads = 2; *************************************************************** -scenario 1, conflicting UPDATE +scenario 1, conflicting UPDATE *************************************************************** SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync'; connection node_1; @@ -31,7 +31,7 @@ SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync'; SET GLOBAL wsrep_provider_options = 'dbug='; *************************************************************** -scenario 2, conflicting DELETE +scenario 2, conflicting DELETE *************************************************************** SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync'; connection node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_parallel_apply_lock_table.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_parallel_apply_lock_table.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_parallel_apply_lock_table.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_parallel_apply_lock_table.result 2025-05-19 16:14:24.000000000 +0000 @@ -10,10 +10,10 @@ INSERT INTO t2 VALUES (1); connection node_2a; SET SESSION wsrep_sync_wait=0; -SELECT COUNT(*) AS EXPECT_1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE (STATE LIKE 'Commit' or STATE = 'Waiting for certification'); +SELECT COUNT(*) AS EXPECT_1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE '%committing%' OR STATE LIKE 'Commit' OR STATE LIKE 'Waiting for certification'); EXPECT_1 1 -SELECT COUNT(*) AS EXPECT_1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE '%Waiting for table metadata lock%'; +SELECT COUNT(*) AS EXPECT_1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); EXPECT_1 1 SELECT COUNT(*) AS EXPECT_0 FROM t1; @@ -32,9 +32,8 @@ SELECT COUNT(*) AS EXPECT_1 FROM t2; EXPECT_1 1 -SELECT COUNT(*) AS EXPECT_2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE (STATE LIKE '%committed%' or STATE = 'Waiting for certification'); +SELECT COUNT(*) AS EXPECT_2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE '%committed%' OR STATE LIKE 'Waiting for certification'); EXPECT_2 2 -SET GLOBAL wsrep_slave_threads = 1;; DROP TABLE t1; DROP TABLE t2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_parallel_simple.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_parallel_simple.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_parallel_simple.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_parallel_simple.result 2025-05-19 16:14:24.000000000 +0000 @@ -34,6 +34,5 @@ SELECT COUNT(*) as expect_20 FROM t2; expect_20 20 -SET GLOBAL wsrep_slave_threads = 1;; DROP TABLE t1; DROP TABLE t2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_partitioned_tables.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_partitioned_tables.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_partitioned_tables.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_partitioned_tables.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,176 @@ +connection node_2; +connection node_1; +call mtr.add_suppression("WSREP: wsrep_mode = STRICT_REPLICATION enabled\\. Storage engine partition for table"); +# wsrep-mode= DEFAULT +SET GLOBAL wsrep_mode = ""; +SELECT @@wsrep_mode; +@@wsrep_mode + +CREATE OR REPLACE TABLE t1 (v1 INT NOT NULL PRIMARY KEY) ENGINE=InnoDB +PARTITION BY KEY (v1) +PARTITIONS 2; +CREATE OR REPLACE TABLE t2 (v1 INT NOT NULL PRIMARY KEY) ENGINE=MyISAM +PARTITION BY KEY (v1) +PARTITIONS 2; +ALTER TABLE t1 ADD COLUMN v2 int; +ALTER TABLE t2 ADD COLUMN v2 int; +INSERT INTO t1 VALUES (1,1),(2,2); +INSERT INTO t2 VALUES (1,1),(2,2); +ALTER TABLE t1 ADD COLUMN v3 int, ENGINE=MyISAM; +ALTER TABLE t2 ADD COLUMN v3 int, ENGINE=Aria; +UPDATE t1 SET v3 = 3; +UPDATE t2 SET v3 = 3; +CREATE INDEX xx1 ON t1(v2); +CREATE INDEX xx2 ON t2(v2); +DROP INDEX xx1 ON t1; +DROP INDEX xx2 ON t2; +TRUNCATE TABLE t1; +TRUNCATE TABLE t2; +RENAME TABLE t1 TO t1_v2; +RENAME TABLE t2 TO t2_v2; +CREATE VIEW x1 AS SELECT * FROM t1_v2; +CREATE VIEW x2 AS SELECT * FROM t2_v2; +CREATE DEFINER=`root`@`localhost` TRIGGER increment_before_t1 +AFTER INSERT ON t1_v2 FOR EACH ROW +UPDATE t1_v2 SET t1_v2.v3 = t1_v2.v3+1; +CREATE DEFINER=`root`@`localhost` TRIGGER increment_before_t2 +AFTER INSERT ON t2_v2 FOR EACH ROW +UPDATE t2_v2 SET t2_v2.v3 = t2_v2.v3+1; +connection node_2; +SHOW CREATE TABLE t1_v2; +Table Create Table +t1_v2 CREATE TABLE `t1_v2` ( + `v1` int(11) NOT NULL, + `v2` int(11) DEFAULT NULL, + `v3` int(11) DEFAULT NULL, + PRIMARY KEY (`v1`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci + PARTITION BY KEY (`v1`) +PARTITIONS 2 +SHOW CREATE TABLE t2_v2; +Table Create Table +t2_v2 CREATE TABLE `t2_v2` ( + `v1` int(11) NOT NULL, + `v2` int(11) DEFAULT NULL, + `v3` int(11) DEFAULT NULL, + PRIMARY KEY (`v1`) +) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci + PARTITION BY KEY (`v1`) +PARTITIONS 2 +SHOW CREATE VIEW x1; +View Create View character_set_client collation_connection +x1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `x1` AS select `t1_v2`.`v1` AS `v1`,`t1_v2`.`v2` AS `v2`,`t1_v2`.`v3` AS `v3` from `t1_v2` latin1 latin1_swedish_ci +SHOW CREATE VIEW x2; +View Create View character_set_client collation_connection +x2 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `x2` AS select `t2_v2`.`v1` AS `v1`,`t2_v2`.`v2` AS `v2`,`t2_v2`.`v3` AS `v3` from `t2_v2` latin1 latin1_swedish_ci +SELECT * FROM t1_v2; +v1 v2 v3 +SELECT * FROM t2_v2; +v1 v2 v3 +connection node_1; +DROP VIEW x1; +DROP VIEW x2; +DROP TRIGGER increment_before_t1; +DROP TRIGGER increment_before_t2; +DROP TABLE t1_v2; +DROP TABLE t2_v2; +SET GLOBAL wsrep_mode = ""; +CREATE OR REPLACE TABLE t2 (v1 INT NOT NULL PRIMARY KEY) ENGINE=MyISAM +PARTITION BY KEY (v1) +PARTITIONS 2; +# wsrep-mode= STRICT_REPLICATION +SET GLOBAL wsrep_mode = "STRICT_REPLICATION"; +SELECT @@wsrep_mode; +@@wsrep_mode +STRICT_REPLICATION +CREATE OR REPLACE TABLE t1 (v1 INT NOT NULL PRIMARY KEY) ENGINE=InnoDB +PARTITION BY KEY (v1) +PARTITIONS 2; +CREATE OR REPLACE TABLE t3 (v1 INT NOT NULL PRIMARY KEY) ENGINE=MyISAM +PARTITION BY KEY (v1) +PARTITIONS 2; +ERROR HY000: Galera replication not supported +ALTER TABLE t1 ADD COLUMN v2 int; +ALTER TABLE t2 ADD COLUMN v2 int; +ERROR HY000: Galera replication not supported +INSERT INTO t1 VALUES (1,1),(2,2); +Warnings: +Warning 1290 WSREP: wsrep_mode = STRICT_REPLICATION enabled. Storage engine partition for table 'test'.'t1' is not supported in Galera +INSERT INTO t2 VALUES (1),(2); +Warnings: +Warning 1290 WSREP: wsrep_mode = STRICT_REPLICATION enabled. Storage engine partition for table 'test'.'t2' is not supported in Galera +ALTER TABLE t1 ADD COLUMN v3 int, ENGINE=MyISAM; +ERROR HY000: Galera replication not supported +ALTER TABLE t2 ADD COLUMN v3 int, ENGINE=Aria; +ERROR HY000: Galera replication not supported +UPDATE t1 SET v2 = v2 + 3; +Warnings: +Warning 1290 WSREP: wsrep_mode = STRICT_REPLICATION enabled. Storage engine partition for table 'test'.'t1' is not supported in Galera +UPDATE t2 SET v1 = v1 + 3; +Warnings: +Warning 1290 WSREP: wsrep_mode = STRICT_REPLICATION enabled. Storage engine partition for table 'test'.'t2' is not supported in Galera +CREATE INDEX xx1 ON t1(v2); +CREATE INDEX xx2 ON t2(v2); +ERROR HY000: Galera replication not supported +DROP INDEX xx1 ON t1; +DROP INDEX xx2 on t2; +ERROR HY000: Galera replication not supported +TRUNCATE TABLE t1; +TRUNCATE TABLE t2; +ERROR HY000: Galera replication not supported +RENAME TABLE t1 TO t1_v2; +RENAME TABLE t2 TO t2_v2; +RENAME TABLE t2_v2 TO t2; +CREATE VIEW x1 AS SELECT * FROM t1_v2; +CREATE VIEW x2 AS SELECT * FROM t2; +ERROR HY000: Galera replication not supported +CREATE DEFINER=`root`@`localhost` TRIGGER increment_before_t1 +AFTER INSERT ON t1_v2 FOR EACH ROW +UPDATE t1_v2 SET t1_v2.v2 = t1_v2.v2+1; +CREATE DEFINER=`root`@`localhost` TRIGGER increment_before_t2 +AFTER INSERT ON t2 FOR EACH ROW +UPDATE t2 SET t2.v1 = t2.v1+1; +ERROR HY000: Galera replication not supported +connection node_2; +SHOW CREATE TABLE t1_v2; +Table Create Table +t1_v2 CREATE TABLE `t1_v2` ( + `v1` int(11) NOT NULL, + `v2` int(11) DEFAULT NULL, + PRIMARY KEY (`v1`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci + PARTITION BY KEY (`v1`) +PARTITIONS 2 +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `v1` int(11) NOT NULL, + `v2` int(11) DEFAULT NULL, + PRIMARY KEY (`v1`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci + PARTITION BY KEY (`v1`) +PARTITIONS 2 +SHOW CREATE VIEW x1; +View Create View character_set_client collation_connection +x1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `x1` AS select `t1_v2`.`v1` AS `v1`,`t1_v2`.`v2` AS `v2` from `t1_v2` latin1 latin1_swedish_ci +SELECT * FROM t1_v2; +v1 v2 +SELECT * FROM t2; +v1 v2 +connection node_1; +DROP VIEW x1; +DROP TRIGGER increment_before_t1; +DROP TABLE t1_v2; +DROP TABLE t2; +SET GLOBAL wsrep_mode = ""; +CREATE OR REPLACE TABLE t2 (v1 INT NOT NULL PRIMARY KEY) ENGINE=MyISAM +PARTITION BY KEY (v1) +PARTITIONS 2; +# wsrep-mode= STRICT_REPLICATION +SET GLOBAL wsrep_mode = "STRICT_REPLICATION"; +SELECT @@wsrep_mode; +@@wsrep_mode +STRICT_REPLICATION +ALTER TABLE t2 ENGINE=InnoDB; +DROP TABLE t2; +SET GLOBAL wsrep_mode = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_restart_replica.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_restart_replica.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_restart_replica.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_restart_replica.result 2025-05-19 16:14:24.000000000 +0000 @@ -7,6 +7,7 @@ ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; connection node_1; connection replica; +connection primary; connection replica; START SLAVE; connection primary; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequence_engine.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequence_engine.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequence_engine.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequence_engine.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,10 @@ connection node_2; connection node_1; +connection node_2; +SET GLOBAL wsrep_ignore_apply_errors=0; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +SET SESSION wsrep_sync_wait=0; SET GLOBAL wsrep_ignore_apply_errors=0; SET SESSION AUTOCOMMIT=0; SET SESSION max_error_count=0; @@ -8,5 +13,4 @@ connection node_2; SHOW CREATE TABLE t0; ERROR 42S02: Table 'test.t0' doesn't exist -connection node_1; -SET GLOBAL wsrep_ignore_apply_errors=DEFAULT; +disconnect node_2a; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequences,binlogoff.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences,binlogoff.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequences,binlogoff.rdiff 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences,binlogoff.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,11 @@ +--- r/galera_sequences.result ++++ r/galera_sequences,binlogoff.reject +@@ -313,7 +313,7 @@ + 7 4 + SELECT NEXTVAL(t); + NEXTVAL(t) +-42 ++2 + connection node_1; + DROP TABLE t1; + DROP SEQUENCE t; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequences.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequences.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences.result 2025-05-19 16:14:24.000000000 +0000 @@ -47,6 +47,9 @@ NEXT VALUE FOR Seq1_1 4 connection node_1; +SHOW CREATE SEQUENCE Seq1_1; +Table Create Table +Seq1_1 CREATE SEQUENCE `Seq1_1` start with 1 minvalue 1 maxvalue 9223372036854775806 increment by 1 nocache nocycle ENGINE=InnoDB DROP SEQUENCE Seq1_1; connection node_1; CREATE TABLE t2 (d CHAR(1)KEY); @@ -279,6 +282,9 @@ connection node_1; DROP TABLE t1; DROP SEQUENCE t; +connection node_2; +SET SESSION wsrep_sync_wait=15; +connection node_1; CREATE SEQUENCE t INCREMENT BY 0 CACHE=20 ENGINE=INNODB; CREATE TABLE t1(a int not null primary key default nextval(t), b int) engine=innodb; BEGIN; @@ -324,4 +330,14 @@ ALTER SEQUENCE IF EXISTS t MINVALUE=1; ERROR 42000: This version of MariaDB doesn't yet support 'CACHE without INCREMENT BY 0 in Galera cluster' DROP TABLE t; + +MDEV-32631: + +CREATE OR REPLACE TABLE t1(c INT ) ENGINE=ARIA; +SET SESSION WSREP_OSU_METHOD=RSU; +INSERT INTO t1 SELECT seq,concat(seq,1) FROM seq_1_to_100; +ERROR 42000: This version of MariaDB doesn't yet support 'RSU on this table engine' +SET SESSION WSREP_OSU_METHOD=TOI; +DROP TABLE t1; + End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequences_bf_kill.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences_bf_kill.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequences_bf_kill.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences_bf_kill.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,152 @@ +connection node_2; +connection node_1; +connection node_1; +CREATE SEQUENCE s INCREMENT=0 CACHE=5 ENGINE=InnoDB; +CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1, 0), (3, 0); +connection node_1; +START TRANSACTION; +INSERT INTO t1 VALUES (4, next value for s); +INSERT INTO t1 VALUES (5, next value for s); +INSERT INTO t1 VALUES (6, next value for s); +INSERT INTO t1 VALUES (7, next value for s); +INSERT INTO t1 VALUES (8, next value for s); +INSERT INTO t1 VALUES (9, next value for s); +INSERT INTO t1 VALUES (10, next value for s); +INSERT INTO t1 VALUES (11, next value for s); +INSERT INTO t1 VALUES (12, next value for s); +INSERT INTO t1 VALUES (13, next value for s); +INSERT INTO t1 VALUES (14, next value for s); +SELECT * FROM t1 WHERE f1 > 0 FOR UPDATE; +f1 f2 +1 0 +3 0 +4 1 +5 3 +6 5 +7 7 +8 9 +9 11 +10 13 +11 15 +12 17 +13 19 +14 21 +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET SESSION wsrep_sync_wait=0; +SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; +connection node_2; +INSERT INTO t1 VALUES (2, 2); +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync'; +connection node_1; +COMMIT; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'dbug=d,abort_trx_end'; +SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=abort_trx_end'; +SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync'; +connection node_1; +wsrep_local_replays +1 +INSERT INTO t1 VALUES (22, next value for s); +INSERT INTO t1 VALUES (23, next value for s); +INSERT INTO t1 VALUES (24, next value for s); +INSERT INTO t1 VALUES (25, next value for s); +INSERT INTO t1 VALUES (26, next value for s); +INSERT INTO t1 VALUES (27, next value for s); +INSERT INTO t1 VALUES (28, next value for s); +INSERT INTO t1 VALUES (29, next value for s); +INSERT INTO t1 VALUES (30, next value for s); +INSERT INTO t1 VALUES (31, next value for s); +INSERT INTO t1 VALUES (32, next value for s); +INSERT INTO t1 VALUES (33, next value for s); +INSERT INTO t1 VALUES (34, next value for s); +INSERT INTO t1 VALUES (35, next value for s); +connection node_1; +SELECT * FROM t1; +f1 f2 +1 0 +2 2 +3 0 +4 1 +5 3 +6 5 +7 7 +8 9 +9 11 +10 13 +11 15 +12 17 +13 19 +14 21 +22 31 +23 33 +24 35 +25 37 +26 39 +27 41 +28 43 +29 45 +30 47 +31 49 +32 51 +33 53 +34 55 +35 57 +SELECT LASTVAL(s); +LASTVAL(s) +57 +connection node_2; +SELECT * FROM t1; +f1 f2 +1 0 +2 2 +3 0 +4 1 +5 3 +6 5 +7 7 +8 9 +9 11 +10 13 +11 15 +12 17 +13 19 +14 21 +22 31 +23 33 +24 35 +25 37 +26 39 +27 41 +28 43 +29 45 +30 47 +31 49 +32 51 +33 53 +34 55 +35 57 +SELECT LASTVAL(s); +LASTVAL(s) +NULL +connection node_1; +SELECT NEXTVAL(s); +NEXTVAL(s) +59 +connection node_2; +SELECT NEXTVAL(s); +NEXTVAL(s) +62 +DROP SEQUENCE s; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequences_transaction.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences_transaction.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sequences_transaction.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sequences_transaction.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,350 @@ +connection node_2; +connection node_1; +connection node_1; +CREATE SEQUENCE s INCREMENT=0 CACHE=5 ENGINE=InnoDB; +CREATE TABLE t1 (f1 INT PRIMARY KEY DEFAULT NEXTVAL(s), f2 INT) ENGINE=InnoDB; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_1; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +COMMIT; +connection node_2; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +COMMIT; +connection node_2a; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +COMMIT; +connection node_1a; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +COMMIT; +connection node_2; +SELECT LASTVAL(s); +LASTVAL(s) +40 +connection node_1; +SELECT LASTVAL(s); +LASTVAL(s) +19 +connection node_2a; +SELECT LASTVAL(s); +LASTVAL(s) +60 +connection node_1a; +SELECT LASTVAL(s); +LASTVAL(s) +79 +connection node_1; +SELECT * FROM t1; +f1 f2 +1 1 +3 1 +5 1 +7 1 +9 1 +11 1 +13 1 +15 1 +17 1 +19 1 +22 1 +24 1 +26 1 +28 1 +30 1 +32 1 +34 1 +36 1 +38 1 +40 1 +42 1 +44 1 +46 1 +48 1 +50 1 +52 1 +54 1 +56 1 +58 1 +60 1 +61 1 +63 1 +65 1 +67 1 +69 1 +71 1 +73 1 +75 1 +77 1 +79 1 +connection node_2; +SELECT * FROM t1; +f1 f2 +1 1 +3 1 +5 1 +7 1 +9 1 +11 1 +13 1 +15 1 +17 1 +19 1 +22 1 +24 1 +26 1 +28 1 +30 1 +32 1 +34 1 +36 1 +38 1 +40 1 +42 1 +44 1 +46 1 +48 1 +50 1 +52 1 +54 1 +56 1 +58 1 +60 1 +61 1 +63 1 +65 1 +67 1 +69 1 +71 1 +73 1 +75 1 +77 1 +79 1 +connection node_1; +DROP TABLE t1; +DROP SEQUENCE s; +connection node_1; +CREATE SEQUENCE s INCREMENT=0 CACHE=5 ENGINE=InnoDB; +CREATE TABLE t1 (f1 INT PRIMARY KEY DEFAULT NEXTVAL(s), f2 INT) ENGINE=InnoDB; +connection node_1; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +ROLLBACK; +connection node_2; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +ROLLBACK; +connection node_2a; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +ROLLBACK; +connection node_1a; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +ROLLBACK; +connection node_2; +SELECT LASTVAL(s); +LASTVAL(s) +20 +connection node_1; +SELECT LASTVAL(s); +LASTVAL(s) +19 +connection node_2a; +SELECT LASTVAL(s); +LASTVAL(s) +40 +connection node_1a; +SELECT LASTVAL(s); +LASTVAL(s) +39 +connection node_1; +SELECT * FROM t1; +f1 f2 +connection node_2; +SELECT * FROM t1; +f1 f2 +connection node_1; +DROP TABLE t1; +DROP SEQUENCE s; +connection node_1; +CREATE SEQUENCE s INCREMENT=0 CACHE=5 ENGINE=InnoDB; +CREATE TABLE t1 (f1 INT PRIMARY KEY DEFAULT NEXTVAL(s), f2 INT) ENGINE=InnoDB; +connection node_1; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +connection node_1a; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +connection node_2a; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +connection node_2; +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +connection node_1; +COMMIT; +connection node_1a; +ROLLBACK; +connection node_2; +COMMIT; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +connection node_2a; +ROLLBACK; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +connection node_2; +SELECT LASTVAL(s); +LASTVAL(s) +40 +connection node_1; +SELECT LASTVAL(s); +LASTVAL(s) +19 +connection node_2a; +SELECT LASTVAL(s); +LASTVAL(s) +20 +connection node_1a; +SELECT LASTVAL(s); +LASTVAL(s) +39 +connection node_1; +SELECT * FROM t1; +f1 f2 +1 1 +3 1 +5 1 +7 1 +9 1 +11 1 +13 1 +15 1 +17 1 +19 1 +connection node_2; +SELECT * FROM t1; +f1 f2 +1 1 +3 1 +5 1 +7 1 +9 1 +11 1 +13 1 +15 1 +17 1 +19 1 +connection node_1; +DROP TABLE t1; +DROP SEQUENCE s; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_slave_replay.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_slave_replay.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_slave_replay.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_slave_replay.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ -connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; -connection node_2a; connection node_2; connection node_1; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; connection node_3; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_split_brain.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_split_brain.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_split_brain.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_split_brain.result 2025-05-19 16:14:24.000000000 +0000 @@ -2,6 +2,7 @@ connection node_1; connection node_1; connection node_2; +connection node_2; call mtr.add_suppression("WSREP: TO isolation failed for: "); connection node_1; call mtr.add_suppression("CREATE TABLE isolation failure"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ssl.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ssl.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,8 @@ connection node_2; connection node_1; +SELECT COUNT(*) `expect 0` FROM performance_schema.socket_instances WHERE EVENT_NAME LIKE '%wsrep%'; +expect 0 +0 SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; VARIABLE_VALUE = 'Synced' 1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ssl_cipher.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_cipher.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ssl_cipher.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_cipher.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,30 @@ +connection node_2; +connection node_1; +# Correct Galera library found +connection node_1; +connection node_2; +connection node_1; +connection node_2; +SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; +VARIABLE_VALUE = 'Synced' +1 +SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +VARIABLE_VALUE = 2 +1 +connection node_1; +SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +VARIABLE_VALUE = 2 +1 +connection node_2; +SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +VARIABLE_VALUE = 2 +1 +connection node_1; +SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +VARIABLE_VALUE = 2 +1 +connection node_2; +connection node_1; +call mtr.add_suppression("WSREP: write_handler\\(\\)"); +connection node_2; +call mtr.add_suppression("WSREP: write_handler\\(\\)"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ssl_compression.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_compression.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ssl_compression.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_compression.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,8 @@ connection node_2; connection node_1; +SELECT COUNT(*) `expect 0` FROM performance_schema.socket_instances WHERE EVENT_NAME LIKE '%wsrep%'; +expect 0 +0 SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; VARIABLE_VALUE = 'Synced' 1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_ssl_upgrade.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_upgrade.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_ssl_upgrade.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_ssl_upgrade.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,8 @@ connection node_2; connection node_1; +SELECT COUNT(*) `expect 0` FROM performance_schema.socket_instances WHERE EVENT_NAME LIKE '%wsrep%'; +expect 0 +0 connection node_1; connection node_2; connection node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ ---- galera/r/galera_sst_mariabackup.result 2024-04-11 09:53:12.950512316 +0300 -+++ galera/r/galera_sst_mariabackup,debug.reject 2024-04-11 10:00:36.771144955 +0300 -@@ -524,6 +524,190 @@ +--- r/galera_sst_mariabackup.result ++++ r/galera_sst_mariabackup,debug.reject +@@ -516,5 +516,189 @@ 1 DROP TABLE t1; COMMIT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_force_recovery,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_force_recovery,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_force_recovery,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_force_recovery,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,5 @@ ---- r/galera_sst_mariabackup.result -+++ r/galera_sst_mariabackup,debug.reject +--- r/galera_sst_mariabackup_force_recovery.result ++++ r/galera_sst_mariabackup_force_recovery,debug.reject @@ -516,5 +516,189 @@ 1 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid,debug.rdiff 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,210 @@ +--- r/galera_sst_mariabackup_gtid.result ++++ r/galera_sst_mariabackup_gtid,debug.reject +@@ -516,19 +516,203 @@ + 1 + DROP TABLE t1; + COMMIT; ++Performing State Transfer on a server that has been killed and restarted ++while a DDL was in progress on it ++connection node_1; ++CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 VALUES (1,'node1_committed_before'); ++INSERT INTO t1 VALUES (2,'node1_committed_before'); ++INSERT INTO t1 VALUES (3,'node1_committed_before'); ++INSERT INTO t1 VALUES (4,'node1_committed_before'); ++INSERT INTO t1 VALUES (5,'node1_committed_before'); ++connection node_2; ++START TRANSACTION; ++INSERT INTO t1 VALUES (6,'node2_committed_before'); ++INSERT INTO t1 VALUES (7,'node2_committed_before'); ++INSERT INTO t1 VALUES (8,'node2_committed_before'); ++INSERT INTO t1 VALUES (9,'node2_committed_before'); ++INSERT INTO t1 VALUES (10,'node2_committed_before'); ++COMMIT; ++SET GLOBAL debug_dbug = 'd,sync.alter_opened_table'; ++connection node_1; ++ALTER TABLE t1 ADD COLUMN f2 INTEGER; ++connection node_2; ++SET wsrep_sync_wait = 0; ++Killing server ... ++connection node_1; ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (11,'node1_committed_during'); ++INSERT INTO t1 (id,f1) VALUES (12,'node1_committed_during'); ++INSERT INTO t1 (id,f1) VALUES (13,'node1_committed_during'); ++INSERT INTO t1 (id,f1) VALUES (14,'node1_committed_during'); ++INSERT INTO t1 (id,f1) VALUES (15,'node1_committed_during'); ++COMMIT; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (16,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (17,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (18,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (19,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (20,'node1_to_be_committed_after'); ++connect node_1a_galera_st_kill_slave_ddl, 127.0.0.1, root, , test, $NODE_MYPORT_1; ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (21,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (22,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (23,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (24,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (25,'node1_to_be_rollbacked_after'); ++connection node_2; ++Performing --wsrep-recover ... ++connection node_2; ++Starting server ... ++Using --wsrep-start-position when starting mysqld ... ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (26,'node2_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (27,'node2_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (28,'node2_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (29,'node2_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (30,'node2_committed_after'); ++COMMIT; ++connection node_1; ++INSERT INTO t1 (id,f1) VALUES (31,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (32,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (33,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (34,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (35,'node1_to_be_committed_after'); ++COMMIT; ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (36,'node1_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (37,'node1_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (38,'node1_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (39,'node1_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (40,'node1_committed_after'); ++COMMIT; ++connection node_1a_galera_st_kill_slave_ddl; ++INSERT INTO t1 (id,f1) VALUES (41,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (42,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (43,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (44,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (45,'node1_to_be_rollbacked_after'); ++ROLLBACK; ++SET AUTOCOMMIT=ON; ++SET SESSION wsrep_sync_wait=15; ++SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; ++EXPECT_3 ++3 ++SELECT COUNT(*) AS EXPECT_35 FROM t1; ++EXPECT_35 ++35 ++SELECT * FROM t1; ++id f1 f2 ++1 node1_committed_before NULL ++2 node1_committed_before NULL ++3 node1_committed_before NULL ++4 node1_committed_before NULL ++5 node1_committed_before NULL ++6 node2_committed_before NULL ++7 node2_committed_before NULL ++8 node2_committed_before NULL ++9 node2_committed_before NULL ++10 node2_committed_before NULL ++11 node1_committed_during NULL ++12 node1_committed_during NULL ++13 node1_committed_during NULL ++14 node1_committed_during NULL ++15 node1_committed_during NULL ++16 node1_to_be_committed_after NULL ++17 node1_to_be_committed_after NULL ++18 node1_to_be_committed_after NULL ++19 node1_to_be_committed_after NULL ++20 node1_to_be_committed_after NULL ++26 node2_committed_after NULL ++27 node2_committed_after NULL ++28 node2_committed_after NULL ++29 node2_committed_after NULL ++30 node2_committed_after NULL ++31 node1_to_be_committed_after NULL ++32 node1_to_be_committed_after NULL ++33 node1_to_be_committed_after NULL ++34 node1_to_be_committed_after NULL ++35 node1_to_be_committed_after NULL ++36 node1_committed_after NULL ++37 node1_committed_after NULL ++38 node1_committed_after NULL ++39 node1_committed_after NULL ++40 node1_committed_after NULL ++SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; ++COUNT(*) = 0 ++1 ++COMMIT; ++connection node_1; ++SET AUTOCOMMIT=ON; ++SET SESSION wsrep_sync_wait=15; ++SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; ++EXPECT_3 ++3 ++SELECT COUNT(*) AS EXPECT_35 FROM t1; ++EXPECT_35 ++35 ++SELECT * FROM t1; ++id f1 f2 ++1 node1_committed_before NULL ++2 node1_committed_before NULL ++3 node1_committed_before NULL ++4 node1_committed_before NULL ++5 node1_committed_before NULL ++6 node2_committed_before NULL ++7 node2_committed_before NULL ++8 node2_committed_before NULL ++9 node2_committed_before NULL ++10 node2_committed_before NULL ++11 node1_committed_during NULL ++12 node1_committed_during NULL ++13 node1_committed_during NULL ++14 node1_committed_during NULL ++15 node1_committed_during NULL ++16 node1_to_be_committed_after NULL ++17 node1_to_be_committed_after NULL ++18 node1_to_be_committed_after NULL ++19 node1_to_be_committed_after NULL ++20 node1_to_be_committed_after NULL ++26 node2_committed_after NULL ++27 node2_committed_after NULL ++28 node2_committed_after NULL ++29 node2_committed_after NULL ++30 node2_committed_after NULL ++31 node1_to_be_committed_after NULL ++32 node1_to_be_committed_after NULL ++33 node1_to_be_committed_after NULL ++34 node1_to_be_committed_after NULL ++35 node1_to_be_committed_after NULL ++36 node1_committed_after NULL ++37 node1_committed_after NULL ++38 node1_committed_after NULL ++39 node1_committed_after NULL ++40 node1_committed_after NULL ++SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; ++COUNT(*) = 0 ++1 ++DROP TABLE t1; ++COMMIT; ++SET GLOBAL debug_dbug = $debug_orig; + connection node_1; + # Node_1 + SHOW global variables like 'gtid%pos'; + Variable_name Value +-gtid_binlog_pos 100-10-24 +-gtid_current_pos 100-10-24 ++gtid_binlog_pos 100-10-33 ++gtid_current_pos 100-10-33 + gtid_slave_pos + connection node_2; + # Node_2 + SHOW global variables like 'gtid%pos'; + Variable_name Value +-gtid_binlog_pos 100-10-24 +-gtid_current_pos 100-10-24 ++gtid_binlog_pos 100-10-33 ++gtid_current_pos 100-10-33 + gtid_slave_pos + disconnect node_2; + disconnect node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_gtid.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,534 @@ +connection node_2; +connection node_1; +connection node_1; +connection node_2; +Performing State Transfer on a server that has been shut down cleanly and restarted +connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node1_committed_before'); +INSERT INTO t1 VALUES (2,'node1_committed_before'); +INSERT INTO t1 VALUES (3,'node1_committed_before'); +INSERT INTO t1 VALUES (4,'node1_committed_before'); +INSERT INTO t1 VALUES (5,'node1_committed_before'); +COMMIT; +connection node_2; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (6,'node2_committed_before'); +INSERT INTO t1 VALUES (7,'node2_committed_before'); +INSERT INTO t1 VALUES (8,'node2_committed_before'); +INSERT INTO t1 VALUES (9,'node2_committed_before'); +INSERT INTO t1 VALUES (10,'node2_committed_before'); +COMMIT; +Shutting down server ... +connection node_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (11,'node1_committed_during'); +INSERT INTO t1 VALUES (12,'node1_committed_during'); +INSERT INTO t1 VALUES (13,'node1_committed_during'); +INSERT INTO t1 VALUES (14,'node1_committed_during'); +INSERT INTO t1 VALUES (15,'node1_committed_during'); +COMMIT; +START TRANSACTION; +INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +connect node_1a_galera_st_shutdown_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +connection node_2; +Starting server ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (26,'node2_committed_after'); +INSERT INTO t1 VALUES (27,'node2_committed_after'); +INSERT INTO t1 VALUES (28,'node2_committed_after'); +INSERT INTO t1 VALUES (29,'node2_committed_after'); +INSERT INTO t1 VALUES (30,'node2_committed_after'); +COMMIT; +connection node_1; +INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +COMMIT; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (36,'node1_committed_after'); +INSERT INTO t1 VALUES (37,'node1_committed_after'); +INSERT INTO t1 VALUES (38,'node1_committed_after'); +INSERT INTO t1 VALUES (39,'node1_committed_after'); +INSERT INTO t1 VALUES (40,'node1_committed_after'); +COMMIT; +connection node_1a_galera_st_shutdown_slave; +INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (44,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +ROLLBACK; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_15 FROM t1; +EXPECT_15 +35 +SELECT * from t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +COMMIT; +connection node_1; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_15 FROM t1; +EXPECT_15 +35 +SELECT * from t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +DROP TABLE t1; +COMMIT; +Performing State Transfer on a server that starts from a clean var directory +This is accomplished by shutting down node #2 and removing its var directory before restarting it +connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node1_committed_before'); +INSERT INTO t1 VALUES (2,'node1_committed_before'); +INSERT INTO t1 VALUES (3,'node1_committed_before'); +INSERT INTO t1 VALUES (4,'node1_committed_before'); +INSERT INTO t1 VALUES (5,'node1_committed_before'); +COMMIT; +connection node_2; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (6,'node2_committed_before'); +INSERT INTO t1 VALUES (7,'node2_committed_before'); +INSERT INTO t1 VALUES (8,'node2_committed_before'); +INSERT INTO t1 VALUES (9,'node2_committed_before'); +INSERT INTO t1 VALUES (10,'node2_committed_before'); +COMMIT; +Shutting down server ... +connection node_1; +Cleaning var directory ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (11,'node1_committed_during'); +INSERT INTO t1 VALUES (12,'node1_committed_during'); +INSERT INTO t1 VALUES (13,'node1_committed_during'); +INSERT INTO t1 VALUES (14,'node1_committed_during'); +INSERT INTO t1 VALUES (15,'node1_committed_during'); +COMMIT; +START TRANSACTION; +INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +connect node_1a_galera_st_clean_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +connection node_2; +Starting server ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (26,'node2_committed_after'); +INSERT INTO t1 VALUES (27,'node2_committed_after'); +INSERT INTO t1 VALUES (28,'node2_committed_after'); +INSERT INTO t1 VALUES (29,'node2_committed_after'); +INSERT INTO t1 VALUES (30,'node2_committed_after'); +COMMIT; +connection node_1; +INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +COMMIT; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (36,'node1_committed_after'); +INSERT INTO t1 VALUES (37,'node1_committed_after'); +INSERT INTO t1 VALUES (38,'node1_committed_after'); +INSERT INTO t1 VALUES (39,'node1_committed_after'); +INSERT INTO t1 VALUES (40,'node1_committed_after'); +COMMIT; +connection node_1a_galera_st_clean_slave; +INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (44,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +ROLLBACK; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_35 FROM t1; +EXPECT_35 +35 +SELECT * from t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +COMMIT; +connection node_1; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_35 FROM t1; +EXPECT_35 +35 +SELECT * from t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +DROP TABLE t1; +COMMIT; +Performing State Transfer on a server that has been killed and restarted +connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node1_committed_before'); +INSERT INTO t1 VALUES (2,'node1_committed_before'); +INSERT INTO t1 VALUES (3,'node1_committed_before'); +INSERT INTO t1 VALUES (4,'node1_committed_before'); +INSERT INTO t1 VALUES (5,'node1_committed_before'); +COMMIT; +connection node_2; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (6,'node2_committed_before'); +INSERT INTO t1 VALUES (7,'node2_committed_before'); +INSERT INTO t1 VALUES (8,'node2_committed_before'); +INSERT INTO t1 VALUES (9,'node2_committed_before'); +INSERT INTO t1 VALUES (10,'node2_committed_before'); +COMMIT; +Killing server ... +connection node_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (11,'node1_committed_during'); +INSERT INTO t1 VALUES (12,'node1_committed_during'); +INSERT INTO t1 VALUES (13,'node1_committed_during'); +INSERT INTO t1 VALUES (14,'node1_committed_during'); +INSERT INTO t1 VALUES (15,'node1_committed_during'); +COMMIT; +START TRANSACTION; +INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +connect node_1a_galera_st_kill_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +connection node_2; +Performing --wsrep-recover ... +Starting server ... +Using --wsrep-start-position when starting mysqld ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (26,'node2_committed_after'); +INSERT INTO t1 VALUES (27,'node2_committed_after'); +INSERT INTO t1 VALUES (28,'node2_committed_after'); +INSERT INTO t1 VALUES (29,'node2_committed_after'); +INSERT INTO t1 VALUES (30,'node2_committed_after'); +COMMIT; +connection node_1; +INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +COMMIT; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (36,'node1_committed_after'); +INSERT INTO t1 VALUES (37,'node1_committed_after'); +INSERT INTO t1 VALUES (38,'node1_committed_after'); +INSERT INTO t1 VALUES (39,'node1_committed_after'); +INSERT INTO t1 VALUES (40,'node1_committed_after'); +COMMIT; +connection node_1a_galera_st_kill_slave; +INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (46,'node1_to_be_rollbacked_after'); +ROLLBACK; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_35 FROM t1; +EXPECT_35 +35 +SELECT * FROM t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +COMMIT; +connection node_1; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_35 FROM t1; +EXPECT_35 +35 +SELECT * FROM t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +DROP TABLE t1; +COMMIT; +connection node_1; +# Node_1 +SHOW global variables like 'gtid%pos'; +Variable_name Value +gtid_binlog_pos 100-10-24 +gtid_current_pos 100-10-24 +gtid_slave_pos +connection node_2; +# Node_2 +SHOW global variables like 'gtid%pos'; +Variable_name Value +gtid_binlog_pos 100-10-24 +gtid_current_pos 100-10-24 +gtid_slave_pos +disconnect node_2; +disconnect node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_logarchive,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_logarchive,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_logarchive,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_logarchive,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,5 @@ --- r/galera_sst_mariabackup_logarchive.result -+++ r/galera_sst_mariabackup_logarchive.reject ++++ r/galera_sst_mariabackup_logarchive,debug.reject @@ -516,5 +516,189 @@ 1 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result 2025-05-19 16:14:24.000000000 +0000 @@ -8,6 +8,6 @@ Cleaning var directory ... connection node_2; Starting server ... -include/assert_grep.inc [mariabackup: Using 128974848 bytes for buffer pool \(set by --use-memory parameter\)] +include/assert_grep.inc [mariabackup: Using 134217728 bytes for buffer pool \(set by --use-memory parameter\)] disconnect node_2; disconnect node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,13 +1,12 @@ --- r/galera_sst_mysqldump.result +++ r/galera_sst_mysqldump,debug.reject -@@ -698,11 +698,195 @@ +@@ -698,6 +698,190 @@ 1 DROP TABLE t1; COMMIT; +Performing State Transfer on a server that has been killed and restarted +while a DDL was in progress on it - connection node_1; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); ++connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; @@ -189,12 +188,6 @@ +DROP TABLE t1; +COMMIT; +SET GLOBAL debug_dbug = $debug_orig; -+connection node_1; -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); + connection node_1; + CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); DROP USER sst; - connection node_2; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); - CALL mtr.add_suppression("Can't open and lock time zone table"); - CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump,release.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump,release.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump,release.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump,release.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ ---- r/galera_sst_mysqldump.result -+++ r/galera_sst_mysqldump.reject -@@ -699,10 +699,10 @@ - DROP TABLE t1; - COMMIT; - connection node_1; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - DROP USER sst; - connection node_2; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); - CALL mtr.add_suppression("Can't open and lock time zone table"); - CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump.result 2025-05-19 16:14:24.000000000 +0000 @@ -699,10 +699,10 @@ DROP TABLE t1; COMMIT; connection node_1; -CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); +CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); DROP USER sst; connection node_2; -CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); +CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); CALL mtr.add_suppression("Can't open and lock time zone table"); CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,13 +1,12 @@ --- r/galera_sst_mysqldump_with_key.result +++ r/galera_sst_mysqldump_with_key,debug.reject -@@ -358,11 +358,195 @@ +@@ -358,6 +358,190 @@ 1 DROP TABLE t1; COMMIT; +Performing State Transfer on a server that has been killed and restarted +while a DDL was in progress on it - connection node_1; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); ++connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; @@ -189,12 +188,6 @@ +DROP TABLE t1; +COMMIT; +SET GLOBAL debug_dbug = $debug_orig; -+connection node_1; -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); + connection node_1; + CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); DROP USER sst; - connection node_2; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); - CALL mtr.add_suppression("Can't open and lock time zone table"); - CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,release.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,release.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,release.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key,release.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ ---- r/galera_sst_mysqldump_with_key.result -+++ r/galera_sst_mysqldump_with_key.reject -@@ -359,10 +359,10 @@ - DROP TABLE t1; - COMMIT; - connection node_1; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - DROP USER sst; - connection node_2; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); - CALL mtr.add_suppression("Can't open and lock time zone table"); - CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_mysqldump_with_key.result 2025-05-19 16:14:24.000000000 +0000 @@ -359,10 +359,10 @@ DROP TABLE t1; COMMIT; connection node_1; -CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); +CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); DROP USER sst; connection node_2; -CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); +CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); CALL mtr.add_suppression("Can't open and lock time zone table"); CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_rsync,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_rsync,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,5 @@ --- galera_sst_rsync.result -+++ galera_sst_rsync.reject ++++ galera_sst_rsync,debug.reject @@ -516,3 +516,187 @@ 1 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_rsync_gtid,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_gtid,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_rsync_gtid,debug.rdiff 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_gtid,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,210 @@ +--- r/galera_sst_rsync_gtid.result ++++ r/galera_sst_rsync_gtid,debug.reject +@@ -516,19 +516,203 @@ + 1 + DROP TABLE t1; + COMMIT; ++Performing State Transfer on a server that has been killed and restarted ++while a DDL was in progress on it ++connection node_1; ++CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 VALUES (1,'node1_committed_before'); ++INSERT INTO t1 VALUES (2,'node1_committed_before'); ++INSERT INTO t1 VALUES (3,'node1_committed_before'); ++INSERT INTO t1 VALUES (4,'node1_committed_before'); ++INSERT INTO t1 VALUES (5,'node1_committed_before'); ++connection node_2; ++START TRANSACTION; ++INSERT INTO t1 VALUES (6,'node2_committed_before'); ++INSERT INTO t1 VALUES (7,'node2_committed_before'); ++INSERT INTO t1 VALUES (8,'node2_committed_before'); ++INSERT INTO t1 VALUES (9,'node2_committed_before'); ++INSERT INTO t1 VALUES (10,'node2_committed_before'); ++COMMIT; ++SET GLOBAL debug_dbug = 'd,sync.alter_opened_table'; ++connection node_1; ++ALTER TABLE t1 ADD COLUMN f2 INTEGER; ++connection node_2; ++SET wsrep_sync_wait = 0; ++Killing server ... ++connection node_1; ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (11,'node1_committed_during'); ++INSERT INTO t1 (id,f1) VALUES (12,'node1_committed_during'); ++INSERT INTO t1 (id,f1) VALUES (13,'node1_committed_during'); ++INSERT INTO t1 (id,f1) VALUES (14,'node1_committed_during'); ++INSERT INTO t1 (id,f1) VALUES (15,'node1_committed_during'); ++COMMIT; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (16,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (17,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (18,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (19,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (20,'node1_to_be_committed_after'); ++connect node_1a_galera_st_kill_slave_ddl, 127.0.0.1, root, , test, $NODE_MYPORT_1; ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (21,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (22,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (23,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (24,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (25,'node1_to_be_rollbacked_after'); ++connection node_2; ++Performing --wsrep-recover ... ++connection node_2; ++Starting server ... ++Using --wsrep-start-position when starting mysqld ... ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (26,'node2_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (27,'node2_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (28,'node2_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (29,'node2_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (30,'node2_committed_after'); ++COMMIT; ++connection node_1; ++INSERT INTO t1 (id,f1) VALUES (31,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (32,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (33,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (34,'node1_to_be_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (35,'node1_to_be_committed_after'); ++COMMIT; ++SET AUTOCOMMIT=OFF; ++START TRANSACTION; ++INSERT INTO t1 (id,f1) VALUES (36,'node1_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (37,'node1_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (38,'node1_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (39,'node1_committed_after'); ++INSERT INTO t1 (id,f1) VALUES (40,'node1_committed_after'); ++COMMIT; ++connection node_1a_galera_st_kill_slave_ddl; ++INSERT INTO t1 (id,f1) VALUES (41,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (42,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (43,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (44,'node1_to_be_rollbacked_after'); ++INSERT INTO t1 (id,f1) VALUES (45,'node1_to_be_rollbacked_after'); ++ROLLBACK; ++SET AUTOCOMMIT=ON; ++SET SESSION wsrep_sync_wait=15; ++SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; ++EXPECT_3 ++3 ++SELECT COUNT(*) AS EXPECT_35 FROM t1; ++EXPECT_35 ++35 ++SELECT * FROM t1; ++id f1 f2 ++1 node1_committed_before NULL ++2 node1_committed_before NULL ++3 node1_committed_before NULL ++4 node1_committed_before NULL ++5 node1_committed_before NULL ++6 node2_committed_before NULL ++7 node2_committed_before NULL ++8 node2_committed_before NULL ++9 node2_committed_before NULL ++10 node2_committed_before NULL ++11 node1_committed_during NULL ++12 node1_committed_during NULL ++13 node1_committed_during NULL ++14 node1_committed_during NULL ++15 node1_committed_during NULL ++16 node1_to_be_committed_after NULL ++17 node1_to_be_committed_after NULL ++18 node1_to_be_committed_after NULL ++19 node1_to_be_committed_after NULL ++20 node1_to_be_committed_after NULL ++26 node2_committed_after NULL ++27 node2_committed_after NULL ++28 node2_committed_after NULL ++29 node2_committed_after NULL ++30 node2_committed_after NULL ++31 node1_to_be_committed_after NULL ++32 node1_to_be_committed_after NULL ++33 node1_to_be_committed_after NULL ++34 node1_to_be_committed_after NULL ++35 node1_to_be_committed_after NULL ++36 node1_committed_after NULL ++37 node1_committed_after NULL ++38 node1_committed_after NULL ++39 node1_committed_after NULL ++40 node1_committed_after NULL ++SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; ++COUNT(*) = 0 ++1 ++COMMIT; ++connection node_1; ++SET AUTOCOMMIT=ON; ++SET SESSION wsrep_sync_wait=15; ++SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; ++EXPECT_3 ++3 ++SELECT COUNT(*) AS EXPECT_35 FROM t1; ++EXPECT_35 ++35 ++SELECT * FROM t1; ++id f1 f2 ++1 node1_committed_before NULL ++2 node1_committed_before NULL ++3 node1_committed_before NULL ++4 node1_committed_before NULL ++5 node1_committed_before NULL ++6 node2_committed_before NULL ++7 node2_committed_before NULL ++8 node2_committed_before NULL ++9 node2_committed_before NULL ++10 node2_committed_before NULL ++11 node1_committed_during NULL ++12 node1_committed_during NULL ++13 node1_committed_during NULL ++14 node1_committed_during NULL ++15 node1_committed_during NULL ++16 node1_to_be_committed_after NULL ++17 node1_to_be_committed_after NULL ++18 node1_to_be_committed_after NULL ++19 node1_to_be_committed_after NULL ++20 node1_to_be_committed_after NULL ++26 node2_committed_after NULL ++27 node2_committed_after NULL ++28 node2_committed_after NULL ++29 node2_committed_after NULL ++30 node2_committed_after NULL ++31 node1_to_be_committed_after NULL ++32 node1_to_be_committed_after NULL ++33 node1_to_be_committed_after NULL ++34 node1_to_be_committed_after NULL ++35 node1_to_be_committed_after NULL ++36 node1_committed_after NULL ++37 node1_committed_after NULL ++38 node1_committed_after NULL ++39 node1_committed_after NULL ++40 node1_committed_after NULL ++SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; ++COUNT(*) = 0 ++1 ++DROP TABLE t1; ++COMMIT; ++SET GLOBAL debug_dbug = $debug_orig; + connection node_1; + # Node_1 + SHOW global variables like 'gtid%pos'; + Variable_name Value +-gtid_binlog_pos 100-10-24 +-gtid_current_pos 100-10-24 ++gtid_binlog_pos 100-10-33 ++gtid_current_pos 100-10-33 + gtid_slave_pos + connection node_2; + # Node_2 + SHOW global variables like 'gtid%pos'; + Variable_name Value +-gtid_binlog_pos 100-10-24 +-gtid_current_pos 100-10-24 ++gtid_binlog_pos 100-10-33 ++gtid_current_pos 100-10-33 + gtid_slave_pos + disconnect node_2; + disconnect node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_rsync_gtid.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_gtid.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_rsync_gtid.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_gtid.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,534 @@ +connection node_2; +connection node_1; +connection node_1; +connection node_2; +Performing State Transfer on a server that has been shut down cleanly and restarted +connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node1_committed_before'); +INSERT INTO t1 VALUES (2,'node1_committed_before'); +INSERT INTO t1 VALUES (3,'node1_committed_before'); +INSERT INTO t1 VALUES (4,'node1_committed_before'); +INSERT INTO t1 VALUES (5,'node1_committed_before'); +COMMIT; +connection node_2; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (6,'node2_committed_before'); +INSERT INTO t1 VALUES (7,'node2_committed_before'); +INSERT INTO t1 VALUES (8,'node2_committed_before'); +INSERT INTO t1 VALUES (9,'node2_committed_before'); +INSERT INTO t1 VALUES (10,'node2_committed_before'); +COMMIT; +Shutting down server ... +connection node_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (11,'node1_committed_during'); +INSERT INTO t1 VALUES (12,'node1_committed_during'); +INSERT INTO t1 VALUES (13,'node1_committed_during'); +INSERT INTO t1 VALUES (14,'node1_committed_during'); +INSERT INTO t1 VALUES (15,'node1_committed_during'); +COMMIT; +START TRANSACTION; +INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +connect node_1a_galera_st_shutdown_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +connection node_2; +Starting server ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (26,'node2_committed_after'); +INSERT INTO t1 VALUES (27,'node2_committed_after'); +INSERT INTO t1 VALUES (28,'node2_committed_after'); +INSERT INTO t1 VALUES (29,'node2_committed_after'); +INSERT INTO t1 VALUES (30,'node2_committed_after'); +COMMIT; +connection node_1; +INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +COMMIT; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (36,'node1_committed_after'); +INSERT INTO t1 VALUES (37,'node1_committed_after'); +INSERT INTO t1 VALUES (38,'node1_committed_after'); +INSERT INTO t1 VALUES (39,'node1_committed_after'); +INSERT INTO t1 VALUES (40,'node1_committed_after'); +COMMIT; +connection node_1a_galera_st_shutdown_slave; +INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (44,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +ROLLBACK; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_15 FROM t1; +EXPECT_15 +35 +SELECT * from t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +COMMIT; +connection node_1; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_15 FROM t1; +EXPECT_15 +35 +SELECT * from t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +DROP TABLE t1; +COMMIT; +Performing State Transfer on a server that starts from a clean var directory +This is accomplished by shutting down node #2 and removing its var directory before restarting it +connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node1_committed_before'); +INSERT INTO t1 VALUES (2,'node1_committed_before'); +INSERT INTO t1 VALUES (3,'node1_committed_before'); +INSERT INTO t1 VALUES (4,'node1_committed_before'); +INSERT INTO t1 VALUES (5,'node1_committed_before'); +COMMIT; +connection node_2; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (6,'node2_committed_before'); +INSERT INTO t1 VALUES (7,'node2_committed_before'); +INSERT INTO t1 VALUES (8,'node2_committed_before'); +INSERT INTO t1 VALUES (9,'node2_committed_before'); +INSERT INTO t1 VALUES (10,'node2_committed_before'); +COMMIT; +Shutting down server ... +connection node_1; +Cleaning var directory ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (11,'node1_committed_during'); +INSERT INTO t1 VALUES (12,'node1_committed_during'); +INSERT INTO t1 VALUES (13,'node1_committed_during'); +INSERT INTO t1 VALUES (14,'node1_committed_during'); +INSERT INTO t1 VALUES (15,'node1_committed_during'); +COMMIT; +START TRANSACTION; +INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +connect node_1a_galera_st_clean_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +connection node_2; +Starting server ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (26,'node2_committed_after'); +INSERT INTO t1 VALUES (27,'node2_committed_after'); +INSERT INTO t1 VALUES (28,'node2_committed_after'); +INSERT INTO t1 VALUES (29,'node2_committed_after'); +INSERT INTO t1 VALUES (30,'node2_committed_after'); +COMMIT; +connection node_1; +INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +COMMIT; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (36,'node1_committed_after'); +INSERT INTO t1 VALUES (37,'node1_committed_after'); +INSERT INTO t1 VALUES (38,'node1_committed_after'); +INSERT INTO t1 VALUES (39,'node1_committed_after'); +INSERT INTO t1 VALUES (40,'node1_committed_after'); +COMMIT; +connection node_1a_galera_st_clean_slave; +INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (44,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +ROLLBACK; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_35 FROM t1; +EXPECT_35 +35 +SELECT * from t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +COMMIT; +connection node_1; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_35 FROM t1; +EXPECT_35 +35 +SELECT * from t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +DROP TABLE t1; +COMMIT; +Performing State Transfer on a server that has been killed and restarted +connection node_1; +CREATE TABLE t1 (id int not null primary key,f1 CHAR(255)) ENGINE=InnoDB; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (1,'node1_committed_before'); +INSERT INTO t1 VALUES (2,'node1_committed_before'); +INSERT INTO t1 VALUES (3,'node1_committed_before'); +INSERT INTO t1 VALUES (4,'node1_committed_before'); +INSERT INTO t1 VALUES (5,'node1_committed_before'); +COMMIT; +connection node_2; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (6,'node2_committed_before'); +INSERT INTO t1 VALUES (7,'node2_committed_before'); +INSERT INTO t1 VALUES (8,'node2_committed_before'); +INSERT INTO t1 VALUES (9,'node2_committed_before'); +INSERT INTO t1 VALUES (10,'node2_committed_before'); +COMMIT; +Killing server ... +connection node_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (11,'node1_committed_during'); +INSERT INTO t1 VALUES (12,'node1_committed_during'); +INSERT INTO t1 VALUES (13,'node1_committed_during'); +INSERT INTO t1 VALUES (14,'node1_committed_during'); +INSERT INTO t1 VALUES (15,'node1_committed_during'); +COMMIT; +START TRANSACTION; +INSERT INTO t1 VALUES (16,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (17,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (18,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (19,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (20,'node1_to_be_committed_after'); +connect node_1a_galera_st_kill_slave, 127.0.0.1, root, , test, $NODE_MYPORT_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (21,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (22,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (23,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (24,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (25,'node1_to_be_rollbacked_after'); +connection node_2; +Performing --wsrep-recover ... +Starting server ... +Using --wsrep-start-position when starting mysqld ... +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (26,'node2_committed_after'); +INSERT INTO t1 VALUES (27,'node2_committed_after'); +INSERT INTO t1 VALUES (28,'node2_committed_after'); +INSERT INTO t1 VALUES (29,'node2_committed_after'); +INSERT INTO t1 VALUES (30,'node2_committed_after'); +COMMIT; +connection node_1; +INSERT INTO t1 VALUES (31,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (32,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (33,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (34,'node1_to_be_committed_after'); +INSERT INTO t1 VALUES (35,'node1_to_be_committed_after'); +COMMIT; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (36,'node1_committed_after'); +INSERT INTO t1 VALUES (37,'node1_committed_after'); +INSERT INTO t1 VALUES (38,'node1_committed_after'); +INSERT INTO t1 VALUES (39,'node1_committed_after'); +INSERT INTO t1 VALUES (40,'node1_committed_after'); +COMMIT; +connection node_1a_galera_st_kill_slave; +INSERT INTO t1 VALUES (41,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (42,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (43,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (45,'node1_to_be_rollbacked_after'); +INSERT INTO t1 VALUES (46,'node1_to_be_rollbacked_after'); +ROLLBACK; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_35 FROM t1; +EXPECT_35 +35 +SELECT * FROM t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +COMMIT; +connection node_1; +SET AUTOCOMMIT=ON; +SET SESSION wsrep_sync_wait=15; +SELECT COUNT(*) AS EXPECT_35 FROM t1; +EXPECT_35 +35 +SELECT * FROM t1; +id f1 +1 node1_committed_before +2 node1_committed_before +3 node1_committed_before +4 node1_committed_before +5 node1_committed_before +6 node2_committed_before +7 node2_committed_before +8 node2_committed_before +9 node2_committed_before +10 node2_committed_before +11 node1_committed_during +12 node1_committed_during +13 node1_committed_during +14 node1_committed_during +15 node1_committed_during +16 node1_to_be_committed_after +17 node1_to_be_committed_after +18 node1_to_be_committed_after +19 node1_to_be_committed_after +20 node1_to_be_committed_after +26 node2_committed_after +27 node2_committed_after +28 node2_committed_after +29 node2_committed_after +30 node2_committed_after +31 node1_to_be_committed_after +32 node1_to_be_committed_after +33 node1_to_be_committed_after +34 node1_to_be_committed_after +35 node1_to_be_committed_after +36 node1_committed_after +37 node1_committed_after +38 node1_committed_after +39 node1_committed_after +40 node1_committed_after +SELECT COUNT(*) = 0 FROM (SELECT COUNT(*) AS c, f1 FROM t1 GROUP BY f1 HAVING c NOT IN (5, 10)) AS a1; +COUNT(*) = 0 +1 +DROP TABLE t1; +COMMIT; +connection node_1; +# Node_1 +SHOW global variables like 'gtid%pos'; +Variable_name Value +gtid_binlog_pos 100-10-24 +gtid_current_pos 100-10-24 +gtid_slave_pos +connection node_2; +# Node_2 +SHOW global variables like 'gtid%pos'; +Variable_name Value +gtid_binlog_pos 100-10-24 +gtid_current_pos 100-10-24 +gtid_slave_pos +disconnect node_2; +disconnect node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_rsync_recv_auto,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_recv_auto,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_sst_rsync_recv_auto,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_sst_rsync_recv_auto,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,3 +1,5 @@ +--- r/galera_sst_rsync_recv_auto.result ++++ r/galera_sst_rsync_recv_auto,debug.reject @@ -516,3 +516,187 @@ 1 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_strict_require_innodb.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_strict_require_innodb.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_strict_require_innodb.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_strict_require_innodb.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ connection node_2; connection node_1; -call mtr.add_suppression("WSREP: wsrep_mode = STRICT_REPLICATION enabled. Storage engine .*"); +call mtr.add_suppression("WSREP: wsrep_mode = STRICT_REPLICATION enabled\\. Storage engine "); CREATE TABLE t1(a int NOT NULL PRIMARY KEY, b varchar(50)) ENGINE=INNODB; CREATE TABLE t2(a int NOT NULL PRIMARY KEY, b varchar(50)) ENGINE=MYISAM; CREATE TABLE t3(a int NOT NULL PRIMARY KEY, b varchar(50)) ENGINE=ARIA; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_strict_require_primary_key.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_strict_require_primary_key.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_strict_require_primary_key.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_strict_require_primary_key.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ connection node_2; connection node_1; -call mtr.add_suppression("WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled. Table .*"); +call mtr.add_suppression("WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled\\. Table "); CREATE TABLE t1(a int, b varchar(50)) ENGINE=INNODB; CREATE TABLE t2(a int, b varchar(50)) ENGINE=MYISAM; CREATE TABLE t3(a int, b varchar(50)) ENGINE=MEMORY; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_toi_ddl_nonconflicting.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_toi_ddl_nonconflicting.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_toi_ddl_nonconflicting.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_toi_ddl_nonconflicting.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,29 +1,69 @@ connection node_2; connection node_1; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY AUTO_INCREMENT, f2 INTEGER); +INSERT INTO t1(f2) SELECT seq FROM seq_1_to_1000; +connection node_2a; +SET SESSION wsrep_sync_wait=0; +connection node_1a; +# Block the applier on node_1 and issue a ddl from node_2 +SET SESSION wsrep_sync_wait=0; +SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync'; connection node_2; -ALTER TABLE t1 ADD COLUMN f3 INTEGER; INSERT INTO t1 (f1, f2) VALUES (DEFAULT, 123);; +# DDL 1 +ALTER TABLE t1 ADD COLUMN f3 INTEGER; INSERT INTO t1 VALUES (NULL, 10000, 10000);; +connection node_1a; +SET SESSION wsrep_on = 0; +SET SESSION wsrep_on = 1; +SET GLOBAL wsrep_provider_options = 'dbug='; +# This will block on acquiring total order isolation connection node_1; +# DDL 2 CREATE UNIQUE INDEX i1 ON t1(f2);; +connection node_1a; +# Signal DDL 1 +SET GLOBAL wsrep_provider_options = 'dbug='; +SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync'; +connection node_2; +connection node_1; connection node_2; -INSERT INTO t1 (f1, f2) VALUES (DEFAULT, 234); -SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; -COUNT(*) = 3 -1 -SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; -COUNT(*) = 2 -1 -SELECT COUNT(*) = 2 FROM t1; -COUNT(*) = 2 -1 +SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; +EXPECT_3 +3 +SELECT COUNT(*) AS EXPECT_2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; +EXPECT_2 +2 +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `f1` int(11) NOT NULL AUTO_INCREMENT, + `f2` int(11) DEFAULT NULL, + `f3` int(11) DEFAULT NULL, + PRIMARY KEY (`f1`), + UNIQUE KEY `i1` (`f2`) +) ENGINE=InnoDB AUTO_INCREMENT=2002 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +SELECT COUNT(*) AS EXPECT_1001 FROM t1; +EXPECT_1001 +1001 connection node_1; -SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; -COUNT(*) = 3 -1 -SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; -COUNT(*) = 2 -1 -SELECT COUNT(*) = 2 FROM t1; -COUNT(*) = 2 -1 +SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; +EXPECT_3 +3 +SELECT COUNT(*) AS EXPECT_2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; +EXPECT_2 +2 +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `f1` int(11) NOT NULL AUTO_INCREMENT, + `f2` int(11) DEFAULT NULL, + `f3` int(11) DEFAULT NULL, + PRIMARY KEY (`f1`), + UNIQUE KEY `i1` (`f2`) +) ENGINE=InnoDB AUTO_INCREMENT=2047 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +SELECT COUNT(*) AS EXPECT_1001 FROM t1; +EXPECT_1001 +1001 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_var_replicate_myisam_on.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_var_replicate_myisam_on.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_var_replicate_myisam_on.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_var_replicate_myisam_on.result 2025-05-19 16:14:24.000000000 +0000 @@ -52,8 +52,8 @@ 0 DROP TABLE t1; connection node_1; -CREATE TABLE t1 (f1 INTEGER) ENGINE=MyISAM; -CREATE TABLE t2 (f1 INTEGER) ENGINE=InnoDB; +CREATE TABLE t1 (f1 INTEGER NOT NULL PRIMARY KEY) ENGINE=MyISAM; +CREATE TABLE t2 (f1 INTEGER NOT NULL PRIMARY KEY) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 VALUES (1); @@ -203,6 +203,9 @@ 3 200 4 5 connection node_2; +SELECT COUNT(*) FROM t1; +COUNT(*) +10 SELECT * FROM t1 ORDER BY id; id b 1 1 @@ -224,15 +227,29 @@ DROP TRIGGER tr1; DROP TRIGGER tr2; DROP TRIGGER tr3; -DROP TABLE t1,t2; +DROP TABLE t1, t2; +CREATE TABLE t1 (a INT, b INT, UNIQUE(a)) ENGINE=MyISAM; +CREATE TRIGGER tr1 BEFORE INSERT ON t1 FOR EACH ROW SET NEW.a=1; +INSERT INTO t1 (a,b) VALUES (10,20); +SELECT * from t1; +a b +1 20 +connection node_2; +SELECT * from t1; +a b +1 20 +connection node_1; +DROP TABLE t1; # # MDEV-11152: wsrep_replicate_myisam: SELECT gets replicated using TO # connection node_1; -CREATE TABLE t1 (i INT) ENGINE=INNODB; +CREATE TABLE t1 (i INT NOT NULL PRIMARY KEY) ENGINE=INNODB; INSERT INTO t1 VALUES(1); SELECT * FROM t1; i 1 DROP TABLE t1; -connection node_1; +SET GLOBAL wsrep_mode = DEFAULT; +connection node_2; +SET GLOBAL wsrep_mode = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_var_slave_threads.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_var_slave_threads.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_var_slave_threads.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_var_slave_threads.result 2025-05-19 16:14:24.000000000 +0000 @@ -33,7 +33,6 @@ SELECT COUNT(*) FROM t2; COUNT(*) 70 -SET GLOBAL wsrep_slave_threads = 1; DROP TABLE t1; DROP TABLE t2; # diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_vote_during_ist.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_during_ist.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_vote_during_ist.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_during_ist.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,112 @@ +connection node_4; +connection node_3; +connection node_2; +connection node_1; +# Correct Galera library found +connection node_1; +connection node_2; +connection node_3; +connection node_4; +connection node_1; +CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY); +CREATE PROCEDURE p1(IN max INT) +BEGIN +DECLARE i INT; +DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; +SET i = 0; +WHILE i < max DO +INSERT IGNORE INTO t1 VALUES (DEFAULT); +SET i = i + 1; +END WHILE; +END| +CALL p1(130); +connection node_4; +Shutting down server 4... +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_2; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_3; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +Server 4 left the cluster +connection node_1; +CALL p1(130); +connection node_1; +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; +connection node_2; +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; +connection node_3; +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; +INSERT INTO t2 VALUES (DEFAULT); +CALL p1(130); +connection node_1; +SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation"; +Restarting server 4 +Wait for server 1 to become a donor +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached"; +Server 1 got SST request from server 4 +SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue"; +SET GLOBAL debug = ""; +SET DEBUG_SYNC='RESET'; +Waiting for server 4 to leave the cluster +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_2; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_3; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_4; +Server 4 left the cluster, killing it... +Killed server 4... +Restarting server 4... +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_1; +SELECT count(*) AS expect1_390 FROM t1; +expect1_390 +390 +SELECT count(*) AS expect1_1 FROM t2; +expect1_1 +1 +connection node_2; +SELECT count(*) AS expect2_390 FROM t1; +expect2_390 +390 +SELECT count(*) AS expect2_1 FROM t2; +expect2_1 +1 +connection node_3; +SELECT count(*) AS expect3_390 FROM t1; +expect3_390 +390 +SELECT count(*) AS expect3_1 FROM t2; +expect3_1 +1 +connection node_4; +SELECT count(*) AS expect4_390 FROM t1; +expect4_390 +390 +SELECT count(*) AS expect4_1 FROM t2; +expect4_1 +1 +DROP TABLE t1; +DROP TABLE t2; +DROP PROCEDURE p1; +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); +CALL mtr.add_suppression("Inconsistency detected: Failed on preordered"); +CALL mtr.add_suppression("Failed to apply write set"); +CALL mtr.add_suppression("Sending JOIN failed: -103"); +CALL mtr.add_suppression("Failed to JOIN the cluster after SST"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_vote_joined_apply.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_joined_apply.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_vote_joined_apply.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_joined_apply.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,94 @@ +connection node_4; +connection node_3; +connection node_2; +connection node_1; +# Correct Galera library found +connection node_1; +connection node_2; +connection node_3; +connection node_4; +connection node_1; +CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY); +CREATE PROCEDURE p1(IN max INT) +BEGIN +DECLARE i INT; +DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; +SET i = 0; +WHILE i < max DO +INSERT IGNORE INTO t1 VALUES (DEFAULT); +SET i = i + 1; +END WHILE; +END| +CALL p1(130); +connection node_4; +Shutting down server 4... +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +SET GLOBAL debug = "+d,sync.wsrep_donor_state"; +connection node_4; +Restarting server 4... +connection node_1; +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached"; +Tables on server 1 flushed and locked for SST to server 4 +SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state"; +SET GLOBAL debug = ""; +SET DEBUG_SYNC='RESET'; +Wait for the state snapshot to be copied to server 4 +SST script unlocked server 1 +connection node_1; +CALL p1(130); +connection node_1; +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; +connection node_2; +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; +connection node_3; +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; +INSERT INTO t2 VALUES (DEFAULT); +CALL p1(130); +Waiting for server 4 to leave the cluster +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_2; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_4; +Server 4 left the cluster, killing it... +Killed server 4... +Restarting server 4... +DROP TABLE t2; +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_1; +SELECT count(*) AS expect1_390 FROM t1; +expect1_390 +390 +connection node_2; +SELECT count(*) AS expect2_390 FROM t1; +expect2_390 +390 +connection node_3; +SELECT count(*) AS expect3_390 FROM t1; +expect3_390 +390 +connection node_4; +SELECT count(*) AS expect4_390 FROM t1; +expect4_390 +390 +DROP TABLE t1; +DROP PROCEDURE p1; +connection node_4; +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); +CALL mtr.add_suppression("Inconsistency detected: Inconsistent by consensus"); +CALL mtr.add_suppression("Failed to apply write set: gtid:"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_vote_joined_skip.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_joined_skip.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_vote_joined_skip.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_vote_joined_skip.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,102 @@ +connection node_4; +connection node_3; +connection node_2; +connection node_1; +# Correct Galera library found +connection node_1; +connection node_2; +connection node_3; +connection node_4; +connection node_1; +CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY); +CREATE PROCEDURE p1(IN max INT) +BEGIN +DECLARE i INT; +DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; +SET i = 0; +WHILE i < max DO +INSERT IGNORE INTO t1 VALUES (DEFAULT); +SET i = i + 1; +END WHILE; +END| +CALL p1(130); +connection node_4; +Shutting down server 4... +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +SET GLOBAL debug = "+d,sync.wsrep_donor_state"; +connection node_4; +Restarting server 4... +connection node_1; +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached"; +Tables on server 1 flushed and locked for SST to server 4 +SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state"; +SET GLOBAL debug = ""; +SET DEBUG_SYNC='RESET'; +Wait for the state snapshot to be copied to server 4 +SST script unlocked server 1 +connection node_1; +CALL p1(130); +connection node_3; +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; +INSERT INTO t2 VALUES (DEFAULT); +SET SESSION wsrep_on = OFF; +connection node_1; +CALL p1(130); +Waiting for server 3 to leave the cluster +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_2; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_4; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_3; +Server 3 left the cluster, killing it... +Killed server 3. +Restarting server 3... +Waiting for server 3 to rejoin the cluster +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_3; +sleeping for 20 +Waiting ready +Server 3 restarted. +connection node_1; +SET SESSION wsrep_on = ON; +SET SESSION wsrep_sync_wait = 15; +connection node_1; +SELECT count(*) AS expect1_390 FROM t1; +expect1_390 +390 +connection node_2; +SELECT count(*) AS expect2_390 FROM t1; +expect2_390 +390 +connection node_3; +SELECT count(*) AS expect3_390 FROM t1; +expect3_390 +390 +connection node_4; +SELECT count(*) AS expect4_390 FROM t1; +expect4_390 +390 +DROP TABLE t1; +DROP PROCEDURE p1; +connection node_1; +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); +connection node_2; +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); +connection node_3; +CALL mtr.add_suppression("Vote 0 \\(success\\) on .+ is inconsistent with group"); +connection node_4; +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_wan.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_wan.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_wan.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_wan.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,9 +1,9 @@ connection node_2; connection node_1; -CALL mtr.add_suppression("WSREP: Stray state UUID msg:"); -CALL mtr.add_suppression("Sending JOIN failed: "); -CALL mtr.add_suppression("WSREP: .* sending install message failed: Socket is not connected"); -CALL mtr.add_suppression("There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside"); +CALL mtr.add_suppression("WSREP: Stray state UUID msg: "); +CALL mtr.add_suppression("WSREP: .*Sending JOIN failed: "); +CALL mtr.add_suppression("WSREP: .*sending install message failed: (Transport endpoint|Socket) is not connected"); +CALL mtr.add_suppression("WSREP: .*There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside"); SELECT VARIABLE_VALUE = 4 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; VARIABLE_VALUE = 4 1 @@ -36,8 +36,8 @@ 1 DROP TABLE t1; connection node_1; -call mtr.add_suppression("WSREP: read_completion_condition.*"); -call mtr.add_suppression("WSREP: read_handler.*"); +call mtr.add_suppression("WSREP: read_completion_condition"); +call mtr.add_suppression("WSREP: read_handler"); disconnect node_3; disconnect node_4; disconnect node_2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_wsrep_provider_options_syntax.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_wsrep_provider_options_syntax.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_wsrep_provider_options_syntax.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_wsrep_provider_options_syntax.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ connection node_2; connection node_1; -call mtr.add_suppression("WSREP\: Unknown parameter 'gmcasts\\.segment'"); -call mtr.add_suppression("WSREP\: Set options returned 7"); +call mtr.add_suppression("WSREP: Unknown parameter 'gmcasts\\.segment'"); +call mtr.add_suppression("WSREP: Set options returned 7"); SET GLOBAL wsrep_provider_options="gmcasts.segment=1"; ERROR HY000: Incorrect arguments to SET Unhandled exceptions: 0 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/galera_wsrep_schema_detached.result mariadb-10.11.13/mysql-test/suite/galera/r/galera_wsrep_schema_detached.result --- mariadb-10.11.11/mysql-test/suite/galera/r/galera_wsrep_schema_detached.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/galera_wsrep_schema_detached.result 2025-05-19 16:14:24.000000000 +0000 @@ -3,10 +3,17 @@ connection node_1; connection node_2; connection node_1; -call mtr.add_suppression("WSREP:.*"); +call mtr.add_suppression("WSREP: async IST sender failed to serve"); +call mtr.add_suppression("WSREP: Failed to establish connection: Connection refused"); +call mtr.add_suppression("WSREP: IST failed: IST sender, failed to connect"); +call mtr.add_suppression("WSREP: .*State transfer.* failed: Protocol error"); SET @wsrep_provider_options_orig = @@GLOBAL.wsrep_provider_options; SET GLOBAL wsrep_provider_options ='pc.ignore_sb=true;pc.weight=2'; connection node_2; +call mtr.add_suppression("WSREP: async IST sender failed to serve"); +call mtr.add_suppression("WSREP: Failed to establish connection: Connection refused"); +call mtr.add_suppression("WSREP: IST failed: IST sender, failed to connect"); +call mtr.add_suppression("WSREP: .*State transfer.* failed: Protocol error"); SET @wsrep_cluster_address_orig = @@GLOBAL.wsrep_cluster_address; SET GLOBAL WSREP_ON=0; SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/mdev-29775.result mariadb-10.11.13/mysql-test/suite/galera/r/mdev-29775.result --- mariadb-10.11.11/mysql-test/suite/galera/r/mdev-29775.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/mdev-29775.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,84 @@ +connection node_2; +connection node_1; +SET GLOBAL wsrep_mode=REPLICATE_MYISAM; +CREATE TABLE t (f0 CHAR(0)) ENGINE=MyISAM; +INSERT INTO t VALUES(); +SELECT * FROM t; +f0 +NULL +connection node_2; +SELECT * FROM t; +f0 +NULL +DROP TABLE t; +connection node_1; +SET GLOBAL wsrep_mode=REPLICATE_MYISAM; +SET GLOBAL wsrep_forced_binlog_format=ROW; +CREATE TABLE t (f0 CHAR(0)) ENGINE=MyISAM; +INSERT INTO t VALUES(); +SELECT * FROM t; +f0 +NULL +connection node_2; +SELECT * FROM t; +f0 +NULL +DROP TABLE t; +connection node_1; +SET GLOBAL wsrep_mode=REPLICATE_ARIA; +CREATE TABLE t (f0 CHAR(0)) ENGINE=Aria; +INSERT INTO t VALUES(); +SELECT * FROM t; +f0 +NULL +connection node_2; +SELECT * FROM t; +f0 +NULL +DROP TABLE t; +connection node_1; +SET GLOBAL wsrep_mode=REPLICATE_ARIA; +SET GLOBAL wsrep_forced_binlog_format=ROW; +CREATE TABLE t (f0 CHAR(0)) ENGINE=Aria; +INSERT INTO t VALUES(); +SELECT * FROM t; +f0 +NULL +connection node_2; +SELECT * FROM t; +f0 +NULL +DROP TABLE t; +connection node_1; +SET GLOBAL wsrep_mode=REPLICATE_MYISAM; +SET GLOBAL wsrep_forced_binlog_format=MIXED; +ERROR HY000: wsrep_forced_binlog_format=[MIXED|STATEMENT] can't be set if wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] +SET GLOBAL wsrep_forced_binlog_format=STATEMENT; +ERROR HY000: wsrep_forced_binlog_format=[MIXED|STATEMENT] can't be set if wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] +SET GLOBAL wsrep_mode=REPLICATE_ARIA; +SET GLOBAL wsrep_forced_binlog_format=MIXED; +ERROR HY000: wsrep_forced_binlog_format=[MIXED|STATEMENT] can't be set if wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] +SET GLOBAL wsrep_forced_binlog_format=STATEMENT; +ERROR HY000: wsrep_forced_binlog_format=[MIXED|STATEMENT] can't be set if wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] +SET GLOBAL wsrep_mode=DEFAULT; +SET GLOBAL wsrep_forced_binlog_format=MIXED; +SET GLOBAL wsrep_mode = REPLICATE_MYISAM; +ERROR HY000: wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] can't be enabled if wsrep_forced_binlog != [NONE|ROW] +SET GLOBAL wsrep_mode = REPLICATE_ARIA; +ERROR HY000: wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] can't be enabled if wsrep_forced_binlog != [NONE|ROW] +SET GLOBAL wsrep_mode=DEFAULT; +SET GLOBAL wsrep_forced_binlog_format=STATEMENT; +SET GLOBAL wsrep_mode = REPLICATE_MYISAM; +ERROR HY000: wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] can't be enabled if wsrep_forced_binlog != [NONE|ROW] +SET GLOBAL wsrep_mode = REPLICATE_ARIA; +ERROR HY000: wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] can't be enabled if wsrep_forced_binlog != [NONE|ROW] +SET GLOBAL wsrep_forced_binlog_format=DEFAULT; +SET GLOBAL wsrep_mode=DEFAULT; +SET GLOBAL wsrep_forced_binlog_format=MIXED; +SET GLOBAL wsrep_mode = REPLICATE_MYISAM; +ERROR HY000: wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] can't be enabled if wsrep_forced_binlog != [NONE|ROW] +SET GLOBAL wsrep_forced_binlog_format=STATEMENT; +SET GLOBAL wsrep_mode = REPLICATE_MYISAM; +ERROR HY000: wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] can't be enabled if wsrep_forced_binlog != [NONE|ROW] +SET GLOBAL wsrep_forced_binlog_format=DEFAULT; +SET GLOBAL wsrep_mode=DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/mdev-30653.result mariadb-10.11.13/mysql-test/suite/galera/r/mdev-30653.result --- mariadb-10.11.11/mysql-test/suite/galera/r/mdev-30653.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/mdev-30653.result 2025-05-19 16:14:24.000000000 +0000 @@ -4,7 +4,7 @@ create table t2 (id serial, val int) engine=aria; insert into t1 values(1, 23); insert into t2 values(2, 42); -call mtr.add_suppression("WSREP: Replication of non-transactional engines is experimental. Storage engine Aria for table 'test'.'t2' is not supported in Galera"); +call mtr.add_suppression("WSREP: Replication of non-transactional engines is experimental\\. Storage engine Aria for table 'test'\\.'t2' is not supported in Galera"); begin; update t1 set val=24 where id=1; update t2 set val=41 where id=2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/mysql-wsrep#198.result mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#198.result --- mariadb-10.11.11/mysql-test/suite/galera/r/mysql-wsrep#198.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#198.result 2025-05-19 16:14:24.000000000 +0000 @@ -31,3 +31,6 @@ test.t2 repair note The storage engine for the table doesn't support repair DROP TABLE t1; DROP TABLE t2; +connection node_1; +disconnect node_2a; +disconnect node_2b; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/mysql-wsrep#33,debug.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33,debug.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/mysql-wsrep#33,debug.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33,debug.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ --- r/mysql-wsrep#33.result +++ r/mysql-wsrep#33,debug.reject -@@ -698,12 +698,196 @@ +@@ -698,6 +698,190 @@ 1 DROP TABLE t1; COMMIT; @@ -190,12 +190,4 @@ +SET GLOBAL debug_dbug = $debug_orig; connection node_2; connection node_1; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - DROP USER sst; - connection node_2; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); - CALL mtr.add_suppression("Can't open and lock time zone table"); - CALL mtr.add_suppression("Can't open and lock privilege tables"); + CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/mysql-wsrep#33,release.rdiff mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33,release.rdiff --- mariadb-10.11.11/mysql-test/suite/galera/r/mysql-wsrep#33,release.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33,release.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ ---- r/mysql-wsrep#33.result -+++ r/mysql-wsrep#33.reject -@@ -700,10 +700,10 @@ - COMMIT; - connection node_2; - connection node_1; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - DROP USER sst; - connection node_2; --CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); -+CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); - CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); - CALL mtr.add_suppression("Can't open and lock time zone table"); - CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/mysql-wsrep#33.result mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33.result --- mariadb-10.11.11/mysql-test/suite/galera/r/mysql-wsrep#33.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/mysql-wsrep#33.result 2025-05-19 16:14:24.000000000 +0000 @@ -700,10 +700,10 @@ COMMIT; connection node_2; connection node_1; -CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); +CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); DROP USER sst; connection node_2; -CALL mtr.add_suppression("Slave SQL: Error 'The MySQL server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); +CALL mtr.add_suppression("Slave SQL: Error 'The MariaDB server is running with the --skip-grant-tables option so it cannot execute this statement' on query"); CALL mtr.add_suppression("InnoDB: Error: Table \"mysql\"\\.\"innodb_index_stats\" not found"); CALL mtr.add_suppression("Can't open and lock time zone table"); CALL mtr.add_suppression("Can't open and lock privilege tables"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/r/wsrep_mode_strict_replication.result mariadb-10.11.13/mysql-test/suite/galera/r/wsrep_mode_strict_replication.result --- mariadb-10.11.11/mysql-test/suite/galera/r/wsrep_mode_strict_replication.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/r/wsrep_mode_strict_replication.result 2025-05-19 16:14:24.000000000 +0000 @@ -32,6 +32,8 @@ Level Code Message Error 4165 Galera replication not supported Warning 1031 WSREP: wsrep_mode = STRICT_REPLICATION enabled. Storage engine MyISAM not supported. +Error 4165 Galera replication not supported +Warning 1031 WSREP: wsrep_mode = STRICT_REPLICATION enabled. Storage engine MyISAM not supported. SHOW CREATE TABLE t2; Table Create Table t2 CREATE TABLE `t2` ( diff -Nru mariadb-10.11.11/mysql-test/suite/galera/suite.pm mariadb-10.11.13/mysql-test/suite/galera/suite.pm --- mariadb-10.11.11/mysql-test/suite/galera/suite.pm 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/suite.pm 2025-05-19 16:14:24.000000000 +0000 @@ -10,61 +10,61 @@ push @::global_suppressions, ( - qr(WSREP: wsrep_sst_receive_address is set to '127.0.0.1), - qr(WSREP: Could not open saved state file for reading: .*), - qr(WSREP: Could not open state file for reading: .*), - qr(WSREP: Gap in state sequence. Need state transfer.), + qr(WSREP: wsrep_sst_receive_address is set to '127\.0\.0\.1), + qr(WSREP: Could not open saved state file for reading: ), + qr(WSREP: Could not open state file for reading: ), + qr(WSREP: Gap in state sequence\. Need state transfer\.), qr(WSREP: Failed to prepare for incremental state transfer:), - qr(WSREP:.*down context.*), + qr(WSREP: .*down context.*), qr(WSREP: Failed to send state UUID:), - qr(WSREP: last inactive check more than .* skipping check), - qr(WSREP: Releasing seqno [0-9]* before [0-9]* was assigned.), - qr|WSREP: access file\(.*gvwstate.dat\) failed\(No such file or directory\)|, + qr(WSREP: last inactive check more than .+ skipping check), + qr(WSREP: Releasing seqno [0-9]+ before [0-9]+ was assigned\.), + qr|WSREP: access file\(.*gvwstate.dat\) failed ?\(No such file or directory\)|, qr(WSREP: Quorum: No node with complete state), qr(WSREP: Initial position was provided by configuration or SST, avoiding override), - qr|WSREP: discarding established \(time wait\) .*|, - qr(WSREP: There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside. Will use that one.), + qr|WSREP: discarding established \(time wait\) |, + qr(WSREP: There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside\. Will use that one\.), qr(WSREP: evs::proto.*), - qr|WSREP: Ignoring possible split-brain \(allowed by configuration\) from view:.*|, + qr|WSREP: Ignoring possible split-brain \(allowed by configuration\) from view:|, qr(WSREP: no nodes coming from prim view, prim not possible), - qr(WSREP: Member .* requested state transfer from .* but it is impossible to select State Transfer donor: Resource temporarily unavailable), + qr(WSREP: Member .+ ?requested state transfer from .+ but it is impossible to select State Transfer donor: Resource temporarily unavailable), qr(WSREP: user message in state LEAVING), - qr(WSREP: .* sending install message failed: Transport endpoint is not connected), + qr(WSREP: .* sending install message failed: (Transport endpoint|Socket) is not connected), qr(WSREP: .* sending install message failed: Resource temporarily unavailable), - qr(WSREP: Maximum writeset size exceeded by .*), - qr(WSREP: transaction size exceeded.*), - qr(WSREP: RBR event .*), - qr(WSREP: Ignoring error for TO isolated action: .*), - qr(WSREP: transaction size limit .*), - qr(WSREP: rbr write fail, .*), - qr(WSREP: .*Backend not supported: foo.*), - qr(WSREP: .*Failed to initialize backend using .*), - qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at .*), + qr(WSREP: Maximum writeset size exceeded by ), + qr(WSREP: transaction size exceeded), + qr(WSREP: RBR event ), + qr(WSREP: Ignoring error for TO isolated action: ), + qr(WSREP: transaction size limit ), + qr(WSREP: rbr write fail, ), + qr(WSREP: .*Backend not supported: foo), + qr(WSREP: .*Failed to initialize backend using ), + qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at ), qr(WSREP: gcs connect failed: Socket type not supported), qr(WSREP: failed to open gcomm backend connection: 110: failed to reach primary view: 110 .*), - qr(WSREP: .*Failed to open backend connection: -110 .*), - qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at .*), + qr(WSREP: .*Failed to open backend connection: -110 ), + qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at ), qr(WSREP: gcs connect failed: Connection timed out), qr|WSREP: wsrep::connect\(.*\) failed: 7|, - qr(WSREP: SYNC message from member .* in non-primary configuration. Ignored.), + qr(WSREP: SYNC message from member .+ ?in non-primary configuration\. Ignored\.), qr(WSREP: Could not find peer:), - qr(WSREP: TO isolation failed for: .*), - qr|WSREP: gcs_caused\(\) returned .*|, - qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(SYNCED\). Message ignored.|, - qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(JOINED\). Message ignored.|, - qr|WSREP: Unsupported protocol downgrade: incremental data collection disabled. Expect abort.|, + qr(WSREP: TO isolation failed for: ), + qr|WSREP: gcs_caused\(\) returned |, + qr|WSREP: Protocol violation\. JOIN message sender .+ ?is not in state transfer \(SYNCED\)\. Message ignored\.|, + qr|WSREP: Protocol violation\. JOIN message sender .+ ?is not in state transfer \(JOINED\)\. Message ignored\.|, + qr|WSREP: Unsupported protocol downgrade: incremental data collection disabled\. Expect abort\.|, qr(WSREP: Action message in non-primary configuration from member [0-9]*), qr(WSREP: Last Applied Action message in non-primary configuration from member [0-9]*), - qr(WSREP: discarding established .*), - qr|WSREP: .*core_handle_uuid_msg.*|, - qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0. WSREP_SYNC_WAIT_BEFORE_READ is on), - qr|WSREP: JOIN message from member .* in non-primary configuration. Ignored.|, - qr|Query apply failed:*|, - qr(WSREP: Ignoring error*), - qr(WSREP: Failed to remove page file .*), - qr(WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to .*), - qr|WSREP: Sending JOIN failed: -107 \(Transport endpoint is not connected\). Will retry in new primary component.|, - qr|WSREP: Send action \{.* STATE_REQUEST} returned -107 \(Transport endpoint is not connected\)|, + qr(WSREP: discarding established ), + qr|WSREP: .*core_handle_uuid_msg|, + qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0\. WSREP_SYNC_WAIT_BEFORE_READ is on), + qr|WSREP: JOIN message from member .+ ?in non-primary configuration\. Ignored\.|, + qr|WSREP: .*Query apply failed:|, + qr(WSREP: Ignoring error), + qr(WSREP: Failed to remove page file ), + qr(WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to ), + qr+WSREP: Sending JOIN failed: -107 \((Transport endpoint|Socket) is not connected\)\. Will retry in new primary component\.+, + qr+WSREP: Send action \{.* STATE_REQUEST\} returned -107 \((Transport endpoint|Socket) is not connected\)+, qr|WSREP: Trying to continue unpaused monitor|, qr|WSREP: Wait for gtid returned error 3 while waiting for prior transactions to commit before setting position|, qr|WSREP: Failed to report last committed|, diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/GAL-401.test mariadb-10.11.13/mysql-test/suite/galera/t/GAL-401.test --- mariadb-10.11.11/mysql-test/suite/galera/t/GAL-401.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/GAL-401.test 2025-05-19 16:14:24.000000000 +0000 @@ -48,7 +48,7 @@ SET SESSION wsrep_sync_wait=15; SHOW CREATE TABLE t1; DROP TABLE t1; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender (.*) is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender .+ ?is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); --connection node_1 --let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/GCF-939.test mariadb-10.11.13/mysql-test/suite/galera/t/GCF-939.test --- mariadb-10.11.11/mysql-test/suite/galera/t/GCF-939.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/GCF-939.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,6 +3,7 @@ # --source include/galera_cluster.inc +--source include/have_innodb.inc --exec rm -rf $MYSQLTEST_VARDIR/mysqld.2/data/GRA_*.log @@ -30,5 +31,6 @@ DROP TABLE t1; CALL mtr.add_suppression("Ignoring error 'Unknown table 'test\\.t1'' on query"); + --connection node_2 CALL mtr.add_suppression("Error 'Unknown table 'test\\.t1'' on query"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-10715.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-10715.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-10715.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-10715.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,11 +4,13 @@ log-bin=mysqld-bin log-slave-updates binlog-format=ROW + [mysqld.1] gtid-domain-id=1 wsrep_gtid_mode=1 wsrep_gtid_domain_id=1 + [mysqld.2] gtid-domain-id=1 wsrep_gtid_mode=1 -wsrep_gtid_domain_id=1 \ No newline at end of file +wsrep_gtid_domain_id=1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-15443.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-15443.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-15443.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-15443.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,7 @@ !include ../galera_2nodes.cnf + [mysqld.1] wsrep_auto_increment_control=OFF + [mysqld.2] wsrep_auto_increment_control=OFF diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-18832.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-18832.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-18832.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-18832.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,6 @@ --source include/galera_cluster.inc --source include/have_innodb.inc +--source include/have_sequence.inc CREATE SEQUENCE Seq1_1 START WITH 1 INCREMENT BY 1 NOCACHE; CREATE TABLE t1 (Id int(11) NOT NULL, PRIMARY KEY (Id)); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-20225.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-20225.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-20225.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-20225.test 2025-05-19 16:14:24.000000000 +0000 @@ -41,7 +41,7 @@ SET GLOBAL debug_dbug = 'RESET'; SET DEBUG_SYNC = 'now SIGNAL signal.mdev_20225_continue'; SET DEBUG_SYNC = 'RESET'; -SET GLOBAL wsrep_slave_threads = 1; +SET GLOBAL wsrep_slave_threads = DEFAULT; --connection node_2 # Trigger should now be dropped on node_2. diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-20793.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-20793.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-20793.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-20793.test 2025-05-19 16:14:24.000000000 +0000 @@ -99,4 +99,4 @@ SET debug_sync = "RESET"; DROP TABLE t1; -SET GLOBAL wsrep_slave_threads = 1; +SET GLOBAL wsrep_slave_threads = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-21479.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-21479.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-21479.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-21479.test 2025-05-19 16:14:24.000000000 +0000 @@ -77,7 +77,7 @@ --let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; --source include/wait_condition.inc -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender (.*) is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender .+ ?is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); --connection node_1 --echo # Wait until both nodes are back to cluster diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-22227.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-22227.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-22227.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-22227.test 2025-05-19 16:14:24.000000000 +0000 @@ -13,7 +13,7 @@ --connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1 --connection node_1b SET SESSION wsrep_sync_wait = 0; ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE = 'Waiting for table level lock' +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE 'Waiting for table level lock' --source include/wait_condition.inc --connection node_1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-22708.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-22708.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-22708.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-22708.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,4 @@ !include ../galera_2nodes.cnf [mysqld] -log-bin \ No newline at end of file +log-bin diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-24143.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-24143.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-24143.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-24143.test 2025-05-19 16:14:24.000000000 +0000 @@ -21,4 +21,3 @@ ALTER TABLE t1 DROP COLUMN c2; SELECT get_lock ('test', 1.5); DROP TABLE t1; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-24327.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-24327.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-24327.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-24327.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -3,4 +3,3 @@ [mysqld.1] log-bin=mariadb-bin log-slave-updates=OFF - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-25389.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-25389.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-25389.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-25389.test 2025-05-19 16:14:24.000000000 +0000 @@ -7,6 +7,8 @@ --source ../galera/include/auto_increment_offset_save.inc --connection node_2 +--let $wsrep_slave_threads_orig = `SELECT @@wsrep_slave_threads` + call mtr.add_suppression("WSREP: Failed to create/initialize system thread"); SET GLOBAL debug_dbug='+d,wsrep_simulate_failed_connection_1'; --error ER_WRONG_ARGUMENTS @@ -21,4 +23,9 @@ # issue is fixed. --source include/restart_mysqld.inc +--connection node_2 +--disable_query_log +--eval SET GLOBAL wsrep_slave_threads = $wsrep_slave_threads_orig; +--enable_query_log + --source ../galera/include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-26266.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-26266.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-26266.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-26266.test 2025-05-19 16:14:24.000000000 +0000 @@ -31,7 +31,6 @@ INSERT INTO t2 VALUES (3); INSERT INTO t2 VALUES (4); INSERT INTO t2 VALUES (5); ---error ER_LOCK_DEADLOCK CREATE VIEW v1 AS SELECT c1 FROM t1 WHERE c1 IN (SELECT a FROM t2) GROUP BY c1; - +DROP VIEW v1; DROP TABLE t1,t2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-26597.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-26597.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-26597.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-26597.test 2025-05-19 16:14:24.000000000 +0000 @@ -28,5 +28,3 @@ --source ../../galera/include/auto_increment_offset_restore.inc --connection node_1 DROP TABLE t3; - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-27001.opt mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27001.opt --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-27001.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27001.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1 +1 @@ ---partition=ON \ No newline at end of file +--partition=ON diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-27001.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27001.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-27001.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27001.test 2025-05-19 16:14:24.000000000 +0000 @@ -4,4 +4,4 @@ CREATE TABLE t3 (c INT) PARTITION BY RANGE (c) (PARTITION p1 VALUES LESS THAN (1000)); CREATE TABLE tp2 (c INT); ALTER TABLE t3 CONVERT TABLE tp2 TO PARTITION p2 VALUES LESS THAN (2000); -DROP TABLE t3; \ No newline at end of file +DROP TABLE t3; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-27123.opt mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27123.opt --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-27123.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27123.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1,2 +1 @@ --wsrep_auto_increment_control=OFF --auto_increment_increment=3 --auto_increment_offset=3 - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-27862.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27862.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-27862.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-27862.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,6 @@ --source include/galera_cluster.inc --source include/have_innodb.inc +--source include/have_sequence.inc --disable_ps2_protocol diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-28053.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-28053.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-28053.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-28053.test 2025-05-19 16:14:24.000000000 +0000 @@ -39,6 +39,7 @@ --disable_result_log --eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_3; START SLAVE; + --eval SELECT MASTER_GTID_WAIT('$gtid', 600) --enable_result_log --enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-29293.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-29293.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-29293.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-29293.test 2025-05-19 16:14:24.000000000 +0000 @@ -38,4 +38,3 @@ --reap DROP TABLE t1; SET DEBUG_SYNC= 'RESET'; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-29512.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-29512.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-29512.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-29512.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -10,6 +10,4 @@ max-binlog-size=4096 expire-logs-days=1 - [mysqld.2] - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-32549.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-32549.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-32549.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-32549.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,6 +3,7 @@ # statement is rolled back # --source include/galera_cluster.inc +--source include/have_aria.inc CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) engine=innodb; CREATE TABLE t2 (f1 INTEGER PRIMARY KEY) engine=aria; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-33136.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33136.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-33136.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33136.test 2025-05-19 16:14:24.000000000 +0000 @@ -10,6 +10,7 @@ # transaction in the MDL conflict handling code. --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_debug_sync.inc --source include/have_debug.inc @@ -19,8 +20,8 @@ CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; --connection node_1a -TRUNCATE TABLE t1; -# TRUNCATE forces the next statement to re-read statistics from persistent storage, +RENAME TABLE t1 TO tmp, tmp TO t1; +# RENAME forces the next statement to re-read statistics from persistent storage, # which will acquire MDL locks on the statistics tables in InnoDB. SET SESSION wsrep_retry_autocommit = 0; SET DEBUG_SYNC = 'dict_stats_mdl_acquired SIGNAL may_toi WAIT_FOR bf_abort'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-33828.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33828.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-33828.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33828.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -2,3 +2,12 @@ [mysqld] log-bin +log-slave-updates + +[mysqld.1] +auto-increment-increment=2 +auto-increment-offset=1 + +[mysqld.2] +auto-increment-increment=2 +auto-increment-offset=2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-33828.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33828.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-33828.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-33828.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,5 @@ --source include/galera_cluster.inc --source include/have_innodb.inc ---source include/have_aria.inc SET AUTOCOMMIT=ON; SELECT @@autocommit; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-34647.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-34647.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-34647.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-34647.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,13 @@ +!include ../galera_2nodes.cnf + +[mysqld] +log-bin +log-slave-updates + +[mysqld.1] +auto-increment-increment=2 +auto-increment-offset=1 + +[mysqld.2] +auto-increment-increment=2 +auto-increment-offset=2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-34647.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-34647.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-34647.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-34647.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_aria.inc create table t1(id serial, val varchar(100)) engine=myisam; @@ -38,14 +39,12 @@ insert into t5 select null, 'd' from t5; select * from t2; - --connection node_2 select * from t1; select * from t2; select * from t3; select * from t4; select * from t5; -set global wsrep_mode=default; --connection node_1 drop table t1,t2,t3,t4,t5; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-35748.opt mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35748.opt --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-35748.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35748.opt 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1 @@ +--plugin-load=$HA_ROCKSDB_SO diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-35748.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35748.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-35748.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35748.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,22 @@ +--source include/galera_cluster.inc +--source include/have_sequence.inc +--source include/have_rocksdb.inc + +--connection node_1 +INSTALL PLUGIN IF NOT EXISTS connect SONAME 'ha_connect'; + +CREATE TABLE t1 (f INT) ENGINE=CONNECT; +CREATE TABLE t2 (f INT) ENGINE=ROCKSDB; +--error ER_NOT_SUPPORTED_YET +CREATE TABLE t3 (f INT) ENGINE=SEQUENCE; +show warnings; + +--connection node_2 +show create table t1; +show create table t2; +--error ER_NO_SUCH_TABLE +show create table t3; + +--connection node_1 +DROP TABLE t1, t2; +UNINSTALL PLUGIN IF EXISTS connect; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-35946.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35946.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-35946.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-35946.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,39 @@ +# +# MDEV-35946: Assertion `thd->is_error()' failed in Sql_cmd_dml::prepare +# +--source include/have_innodb.inc +--source include/galera_cluster.inc + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--source include/auto_increment_offset_save.inc + +# +# Disconnect from the cluster +# +SET GLOBAL wsrep_provider_options = 'gmcast.isolate=1'; +SET SESSION wsrep_sync_wait=0; +--let $wait_condition = SELECT VARIABLE_VALUE = 'non-Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +SET SESSION wsrep_sync_wait=DEFAULT; + +# +# If bug is present, assertion will fire +# during the execution of the following DELETE +# +--error ER_LOCK_WAIT_TIMEOUT +DELETE FROM mysql.wsrep_streaming_log; + +# +# Reconnect to the cluster +# +SET SESSION wsrep_sync_wait=0; +SET GLOBAL wsrep_provider_options = 'gmcast.isolate=0'; +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc +SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +SET SESSION wsrep_sync_wait=DEFAULT; + +--source include/auto_increment_offset_restore.inc +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender .+ ?is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-36116.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-36116.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-36116.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-36116.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,43 @@ +# +# MDEV-36116: TOI crashes in debug assert if executing thread is killed. +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_debug.inc + +--connect con1,127.0.0.1,root,,test,$NODE_MYPORT_1 + +# Start TOI operation and wait for the thread to be killed. +--connection node_1 +CALL mtr.add_suppression("CREATE TABLE isolation failure"); + +--let $connection_id = `SELECT CONNECTION_ID()` +SET DEBUG_SYNC = 'wsrep_kill_thd_before_enter_toi SIGNAL may_kill WAIT_FOR continue'; +--send + CREATE TABLE t1 (a INT) ENGINE=InnoDB; + +# Kill the thread and let it continue. +--connection con1 +SET DEBUG_SYNC = 'now WAIT_FOR may_kill'; +--disable_query_log +--eval KILL CONNECTION $connection_id +--enable_query_log +SET DEBUG_SYNC = 'now SIGNAL continue'; + +--connection node_1 +--error 2013,2026 +--reap + +# Verify no tables created on either nodes. +--connection node_2 +SHOW TABLES LIKE 't1'; + +--connection con1 +SHOW TABLES LIKE 't1'; + +# Cleanup +SET DEBUG_SYNC = 'RESET'; +--disconnect con1 +--source include/galera_end.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-6860.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-6860.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-6860.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-6860.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,8 @@ !include ../galera_2nodes_as_slave.cnf +[mysqld.1] +wsrep-slave-threads=10 + [mysqld.2] slave-parallel-threads=2 slave-parallel-mode=optimistic -[mysqld.1] -wsrep-slave-threads=10 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-6860.test mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-6860.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MDEV-6860.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MDEV-6860.test 2025-05-19 16:14:24.000000000 +0000 @@ -4,7 +4,7 @@ --connection node_2 --disable_query_log ---eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_3, MASTER_USE_GTID=slave_pos; +--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_3, master_use_gtid=slave_pos; --enable_query_log START SLAVE; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-259.test mariadb-10.11.13/mysql-test/suite/galera/t/MW-259.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-259.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-259.test 2025-05-19 16:14:24.000000000 +0000 @@ -39,4 +39,3 @@ # Cleanup SET DEBUG_SYNC= 'RESET'; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-284.test mariadb-10.11.13/mysql-test/suite/galera/t/MW-284.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-284.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-284.test 2025-05-19 16:14:24.000000000 +0000 @@ -2,15 +2,16 @@ # MW-284 Slave I/O retry on ER_COM_UNKNOWN_ERROR # ---source include/have_log_bin.inc --source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_log_bin.inc --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 call mtr.add_suppression("\\[ERROR\\] Error reading packet from server: WSREP has not yet prepared node for application use "); call mtr.add_suppression("WSREP has not yet prepared node for application use"); --disable_query_log ---eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_PORT=$NODE_MYPORT_1, MASTER_USER='root', MASTER_CONNECT_RETRY=1; +--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_1, master_connect_retry=1; --enable_query_log --connection node_1 @@ -29,7 +30,7 @@ --connection node_3 SELECT @@wsrep_on; --sleep 1 -call mtr.add_suppression("Error reading packet from server: WSREP has not yet prepared node for application use (server_errno=1047)"); +call mtr.add_suppression("Error reading packet from server: WSREP has not yet prepared node for application use \\(server_errno ?= ?1047\\)"); START SLAVE; --let $slave_param= Slave_IO_Running --let $slave_param_value= Connecting diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-313.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MW-313.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-313.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-313.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,6 +7,3 @@ [mysqld.2] log-bin log-slave-updates - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-329.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MW-329.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-329.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-329.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,6 +4,3 @@ wsrep-retry-autocommit=0 [mysqld.2] - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-329.test mariadb-10.11.13/mysql-test/suite/galera/t/MW-329.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-329.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-329.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,5 @@ # -# #MW-329 Fix incorrect affected rows count after replay +# MW-329 Fix incorrect affected rows count after replay. # --source include/galera_cluster.inc @@ -11,7 +11,7 @@ INSERT INTO t1 (f1) VALUES (1),(65535); # -# Run concurrent INSERTs +# Run concurrent INSERTs # DELIMITER |; @@ -86,6 +86,10 @@ --eval KILL CONNECTION $connection_id --enable_query_log +# +# getting execution results for --send +# + --connection node_1b --error 0,1317,2013,2026 --reap @@ -96,6 +100,8 @@ DROP PROCEDURE proc_insert; DROP TABLE t1; +--disconnect node_1b + # Due to MW-330, Multiple "conflict state 3 after post commit" warnings if table is dropped while SP is running CALL mtr.add_suppression("WSREP: .* conflict state after post commit "); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-329F.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MW-329F.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-329F.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-329F.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,6 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +wsrep-retry-autocommit=0 + +[mysqld.2] diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-329F.test mariadb-10.11.13/mysql-test/suite/galera/t/MW-329F.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-329F.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-329F.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,105 @@ +# +# MW-329F Fix incorrect affected rows count after replay. +# +# This is a version of MW-329 without the infinite loop that +# in the original test is closed by killing the connection. +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc + +CREATE TABLE t1 (f1 INTEGER, f2 CHAR(20) DEFAULT 'abc') ENGINE=InnoDB; + +# We start with a populated table +INSERT INTO t1 (f1) VALUES (1),(65535); + +# +# Run concurrent INSERTs +# + +DELIMITER |; +CREATE PROCEDURE proc_insert (repeat_count int) +BEGIN + DECLARE current_num int; + DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; + SET current_num = 0; + SET SESSION wsrep_sync_wait = 0; + WHILE current_num < repeat_count do + INSERT INTO t1 (f1) VALUES (FLOOR( 1 + RAND( ) * 65535 )); + SELECT SLEEP(0.1); + SET current_num = current_num + 1; + END WHILE; +END| +DELIMITER ;| + +--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1b +--let $connection_id = `SELECT CONNECTION_ID()` +--disable_query_log +--disable_result_log +--send CALL proc_insert(500); + +# +# Run concurrent UPDATEs. We expect that each UPDATE will report that +# some rows were matched and updated +# + +--connection node_2 +--let $count = 2 +--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` + +while ($count) +{ + --let $signature = `SELECT LEFT(MD5(RAND()), 10)` + --disable_query_log + --error 0,ER_LOCK_DEADLOCK + --eval UPDATE t1 SET f2 = '$signature' + --enable_query_log + --let $row_count = `SELECT ROW_COUNT()` + if (`SELECT @@error_count = 0`) { + if (`SELECT $row_count = 0`) { + --die ROW_COUNT() = 0 + } + } + + # + # Ensure at least one replay happens + # + + --let $wsrep_replays = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` + --disable_query_log + if (`SELECT $wsrep_replays - $wsrep_local_replays_old > 0`) { + --dec $count + } + --enable_query_log +} + +# +# Confirm that some transaction replays occurred +# + +--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` +--disable_query_log +--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old > 0 AS wsrep_local_replays; +--enable_query_log + +# +# getting execution results for --send +# + +--connection node_1b +--error 0,1317,2013,2026 +--reap +--enable_query_log +--enable_result_log + +--connection node_1 +DROP PROCEDURE proc_insert; +DROP TABLE t1; + +--disconnect node_1b + +# Due to MW-330, Multiple "conflict state 3 after post commit" warnings if table is dropped while SP is running +CALL mtr.add_suppression("WSREP: .* conflict state after post commit "); + +set global innodb_status_output=Default; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-360-master.opt mariadb-10.11.13/mysql-test/suite/galera/t/MW-360-master.opt --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-360-master.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-360-master.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1,2 +1 @@ --gtid-domain-id=1 --log-bin --log-slave-updates - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-369.inc mariadb-10.11.13/mysql-test/suite/galera/t/MW-369.inc --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-369.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-369.inc 2025-05-19 16:14:24.000000000 +0000 @@ -80,5 +80,3 @@ SET GLOBAL DEBUG_DBUG = ""; SET DEBUG_SYNC = 'RESET'; - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-416.test mariadb-10.11.13/mysql-test/suite/galera/t/MW-416.test --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-416.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-416.test 2025-05-19 16:14:24.000000000 +0000 @@ -21,73 +21,71 @@ #ALTER INSTANCE ROTATE INNODB MASTER KEY; --error 1044,1227,1370 ALTER PROCEDURE proc1 COMMENT 'foo'; ---error 1044,1227,1370 +--error 1044,1227 ALTER SERVER srv OPTIONS (USER 'sally'); ---error 1044,1142,1227,1370 +--error 1044,1142,1227 ALTER TABLE tbl DROP COLUMN col; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 ALTER VIEW vw AS SELECT 1; ---error 1044,1227,1370 +--error 1044,1227 CREATE DATABASE db; ---error 1044,1227,1370 -CREATE EVENT ev1 ON SCHEDULE AT CURRENT_TIMESTAMP DO SELECT 1; +--error 1044,1227 +CREATE EVENT ev1 ON SCHEDULE AT CURRENT_TIMESTAMP DO SELECT 1; --error 1044,1227,1370 CREATE FUNCTION fun1() RETURNS int RETURN(1); --error 1044,1227,1370 CREATE FUNCTION fun1 RETURNS STRING SONAME 'funlib.so'; --error 1044,1227,1370 -CREATE PROCEDURE proc1() BEGIN END; ---error 1044,1142,1227,1370 +CREATE PROCEDURE proc1() BEGIN END; +--error 1044,1142,1227 CREATE INDEX idx ON tbl(id); ---error 1044,1142,1227,1370 +--error 1044,1227 CREATE SERVER srv FOREIGN DATA WRAPPER 'fdw' OPTIONS (USER 'user'); ---error 1044,1142,1227,1370 +--error 1044,1142,1227 CREATE TABLE t (i int); ---error 1044,1142,1227,1370 +--error 1044,1142,1227 CREATE TRIGGER trg BEFORE UPDATE ON t FOR EACH ROW BEGIN END; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 CREATE VIEW vw AS SELECT 1; - - ---error 1044,1142,1227,1370 +--error 1044,1227 DROP DATABASE db; ---error 1044,1142,1227,1370 +--error 1044,1227 DROP EVENT ev; ---error 1044,1142,1227,1370 +--error 1044,1227,1370 DROP FUNCTION fun1; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 DROP INDEX idx ON t0; ---error 1044,1142,1227,1370 +--error 1044,1227,1370 DROP PROCEDURE proc1; ---error 1044,1142,1227,1370 +--error 1044,1227 DROP SERVEr srv; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 DROP TABLE t0; ---error 1044,1142,1227,1360,1370 +--error 1044,1227,1360 DROP TRIGGER trg; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 DROP VIEW vw; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 RENAME TABLE t0 TO t1; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 TRUNCATE TABLE t0; # DCL # account management ---error 1044,1142,1227,1370,1064 +--error 1044,1227,1064 ALTER USER myuser PASSWORD EXPIRE; ---error 1044,1142,1227,1370 +--error 1044,1227 CREATE USER myuser IDENTIFIED BY 'pass'; ---error 1044,1142,1227,1370 +--error 1044,1227 DROP USER myuser; ---error 1044,1045,1142,1227,1370 +--error 1044,1045,1227 GRANT ALL ON *.* TO 'myuser'; ---error 1044,1142,1227,1370 +--error 1044,1227 RENAME USER myuser TO mariauser; --error 1044,1142,1227,1370 REVOKE SELECT ON test FROM myuser; @@ -97,24 +95,25 @@ REVOKE PROXY ON myuser FROM myuser; # table maintenance ---error 1044,1142,1227,1370 +--error 1044,1142,1227 ANALYZE TABLE db.tbl; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 CHECK TABLE db.tbl; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 CHECKSUM TABLE db.tbl; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 OPTIMIZE TABLE db.tbl; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 REPAIR TABLE db.tbl; # plugin and user defined functions ---error 1044,1142,1227,1370 +--error 1044,1142,1227 INSTALL PLUGIN plg SONAME 'plg.so'; ---error 1044,1142,1227,1370 +--error 1044,1142,1227 UNINSTALL PLUGIN plg; --connection node_1 DROP USER 'userMW416'@'localhost'; SHOW DATABASES; +--disconnect userMW416 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/MW-86-wait8.cnf mariadb-10.11.13/mysql-test/suite/galera/t/MW-86-wait8.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/MW-86-wait8.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/MW-86-wait8.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,4 +7,3 @@ [mysqld.2] log-bin log-slave-updates - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/binlog_checksum.test mariadb-10.11.13/mysql-test/suite/galera/t/binlog_checksum.test --- mariadb-10.11.11/mysql-test/suite/galera/t/binlog_checksum.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/binlog_checksum.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc --echo # On node_1 --connection node_1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/create.test mariadb-10.11.13/mysql-test/suite/galera/t/create.test --- mariadb-10.11.11/mysql-test/suite/galera/t/create.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/create.test 2025-05-19 16:14:24.000000000 +0000 @@ -86,4 +86,3 @@ --source include/galera_end.inc --echo # End of tests - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera#414.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera#414.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera#414.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera#414.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcs.max_packet_size=2' +wsrep_provider_options='gcs.max_packet_size=2;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcs.max_packet_size=2' +wsrep_provider_options='gcs.max_packet_size=2;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera#500.test mariadb-10.11.13/mysql-test/suite/galera/t/galera#500.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera#500.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera#500.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,7 +3,12 @@ # thrown from gcomm background thread, the provider terminates properly # and wsrep_ready becomes 0. # +# Not to be run with ASAN. Provider leaks memory when gcomm +# thread is aborted forcifully and ASAN crashes during leak report +# after provider is unloaded. +# +--source include/not_asan.inc --source include/have_innodb.inc --source include/galera_cluster.inc --source include/galera_have_debug_sync.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_2primary_replica.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_2primary_replica.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_2primary_replica.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_2primary_replica.test 2025-05-19 16:14:24.000000000 +0000 @@ -41,17 +41,19 @@ --let $node_1 = replica --let $node_2 = node_2 +--let $node_3 = primary1 +--let $node_4 = primary2 --source include/auto_increment_offset_save.inc --connection replica --echo # Galera replica changing master to primary1 ---disable_query_log SET @@default_master_connection='stream1'; +--disable_query_log --eval CHANGE MASTER 'stream1' TO master_host='127.0.0.1', master_user='repl', master_password='repl', master_port=$NODE_MYPORT_3, master_use_gtid=slave_pos; --enable_query_log -SET @@default_master_connection='stream2'; --echo # Primary node changing master to primary2 +SET @@default_master_connection='stream2'; --disable_query_log --eval CHANGE MASTER 'stream2' TO master_host='127.0.0.1', master_user='repl2', master_password='repl2', master_port=$NODE_MYPORT_4, master_use_gtid=slave_pos; --enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_MDEV-29512.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_MDEV-29512.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_MDEV-29512.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_MDEV-29512.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -10,6 +10,4 @@ max-binlog-size=4096 expire-logs-days=1 - [mysqld.2] - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_alter_engine_myisam.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_alter_engine_myisam.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_alter_engine_myisam.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_alter_engine_myisam.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_aria.inc # @@ -35,7 +36,4 @@ DROP TABLE t1; --connection node_1 ---disable_query_log SET GLOBAL wsrep_mode = DEFAULT; ---enable_query_log - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_applier_ftwrl_table.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_applier_ftwrl_table.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table.test 2025-05-19 16:14:24.000000000 +0000 @@ -20,7 +20,7 @@ --connection node_1a SET SESSION wsrep_sync_wait = 0; ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock' +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); --source include/wait_condition.inc SELECT COUNT(*) = 0 FROM t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -9,6 +9,3 @@ lock_wait_timeout=5 innodb_lock_wait_timeout=5 wait_timeout=5 - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_applier_ftwrl_table_alter.test 2025-05-19 16:14:24.000000000 +0000 @@ -27,16 +27,16 @@ --connection node_1 SELECT 1 FROM DUAL; # Wait ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock'; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); --source include/wait_condition.inc -SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock'; +SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); UNLOCK TABLES; SET SESSION wsrep_sync_wait = 15; SHOW CREATE TABLE t1; -SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock'; +SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_as_slave_ctas.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_as_slave_ctas.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_as_slave_ctas.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_as_slave_ctas.test 2025-05-19 16:14:24.000000000 +0000 @@ -73,4 +73,3 @@ --connection node_3 RESET MASTER; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_as_slave_nonprim.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_as_slave_nonprim.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_as_slave_nonprim.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_as_slave_nonprim.test 2025-05-19 16:14:24.000000000 +0000 @@ -2,7 +2,7 @@ # Test the behavior of a Galera async slave if it goes non-prim. Async replication # should abort with an error but it should be possible to restart it. # -# The galera/galera_2node_slave.cnf describes the setup of the nodes +# The galera_3nodes_as_slave.cnf describes the setup of the nodes # --source include/have_innodb.inc @@ -17,9 +17,10 @@ --connection node_2 --disable_query_log ---eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_PORT=$NODE_MYPORT_4, MASTER_USER='root'; +--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_4; --enable_query_log START SLAVE; + SET SESSION wsrep_sync_wait = 0; --connection node_4 @@ -44,9 +45,8 @@ INSERT INTO t1 VALUES (1),(2),(3),(4),(5); --connection node_2 ---sleep 5 +wait_for_slave_to_stop; --let $value = query_get_value(SHOW SLAVE STATUS, Last_SQL_Error, 1) ---connection node_1 --disable_query_log --eval SELECT "$value" IN ("Error 'Unknown command' on query. Default database: 'test'. Query: 'BEGIN'", "Node has dropped from cluster") AS expected_error --enable_query_log @@ -74,7 +74,6 @@ --connection node_4 DROP TABLE t1; ---sleep 2 --connection node_2 --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; --source include/wait_condition.inc @@ -84,7 +83,7 @@ CALL mtr.add_suppression("Slave SQL: Error 'Unknown command' on query"); CALL mtr.add_suppression("Slave: Unknown command Error_code: 1047"); -CALL mtr.add_suppression("Transport endpoint is not connected"); +CALL mtr.add_suppression("(Transport endpoint|Socket) is not connected"); CALL mtr.add_suppression("Slave SQL: Error in Xid_log_event: Commit could not be completed, 'Deadlock found when trying to get lock; try restarting transaction', Error_code: 1213"); CALL mtr.add_suppression("Slave SQL: Node has dropped from cluster, Error_code: 1047"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_autoinc_sst_mariabackup.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_autoinc_sst_mariabackup.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_autoinc_sst_mariabackup.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_autoinc_sst_mariabackup.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,7 +5,7 @@ wsrep_sst_auth="root:" [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=10M;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=10M;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_backup_stage.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_backup_stage.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_backup_stage.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_backup_stage.test 2025-05-19 16:14:24.000000000 +0000 @@ -56,7 +56,7 @@ # reach commit stage. In the unlikely case the interleaving is different, the # result of the test should not change. --connection node_1c ---let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.processlist WHERE (State='Commit' OR State='Waiting for certification') AND ID=$insert_id +--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.processlist WHERE (STATE LIKE '%committing%' OR STATE LIKE 'Commit' OR STATE LIKE 'Waiting for certification') AND ID=$insert_id --source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.metadata_lock_info WHERE TABLE_NAME='t1' AND THREAD_ID=$insert_id --source include/wait_condition.inc @@ -83,11 +83,11 @@ # wait for insert to get blocked --connection node_1c ---let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.processlist WHERE (State='Commit' OR State='Waiting for certification') AND ID=$insert_id +--let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.processlist WHERE (STATE LIKE '%committing%' OR STATE LIKE 'Commit' OR STATE LIKE 'Waiting for certification') AND ID=$insert_id --source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*)=1 FROM information_schema.metadata_lock_info WHERE TABLE_NAME='t1' AND THREAD_ID=$insert_id --source include/wait_condition.inc ---let $wait_condition = SELECT COUNT(*)=2 FROM information_schema.processlist WHERE Info like 'INSERT INTO t1 (f1) values("node1%")' AND (State = 'Commit' OR State='Waiting for certification') +--let $wait_condition = SELECT COUNT(*)=2 FROM information_schema.processlist WHERE Info like 'INSERT INTO t1 (f1) values("node1%")' AND (STATE LIKE '%committing%' OR STATE LIKE 'Commit' OR STATE LIKE 'Waiting for certification') --source include/wait_condition.inc # nothing after BLOCK_DDL is applied diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_at_after_statement.test 2025-05-19 16:14:24.000000000 +0000 @@ -55,4 +55,3 @@ --disconnect node_2a --disconnect node_2b - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_flush_for_export.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_flush_for_export.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_flush_for_export.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_flush_for_export.test 2025-05-19 16:14:24.000000000 +0000 @@ -17,12 +17,12 @@ --connection node_2 SET SESSION wsrep_sync_wait = 0; ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock' +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); --source include/wait_condition.inc UNLOCK TABLES; ---let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock' +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); --source include/wait_condition.inc COMMIT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_lock_table.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_lock_table.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_lock_table.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_lock_table.test 2025-05-19 16:14:24.000000000 +0000 @@ -16,13 +16,16 @@ INSERT INTO t1 VALUES (2); --connection node_2 ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock' ---source include/wait_condition.inc +SET SESSION wsrep_sync_wait = 0; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); +--let $wait_condition_on_error_output = SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST +--source include/wait_condition_with_debug.inc UNLOCK TABLES; ---let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE = 'Waiting for table metadata lock' ---source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); +--let $wait_condition_on_error_output = SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST +--source include/wait_condition_with_debug.inc COMMIT; SELECT COUNT(*) = 1 FROM t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,10 +6,10 @@ wsrep_debug=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_mariabackup.test 2025-05-19 16:14:24.000000000 +0000 @@ -129,7 +129,7 @@ let SEARCH_PATTERN = Server desynched from group during BACKUP STAGE BLOCK_COMMIT.; --source include/search_pattern_in_file.inc -SET GLOBAL wsrep_mode = ""; +SET GLOBAL wsrep_mode = DEFAULT; --connection node_1 DROP TABLE t; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_ps.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_ps.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_abort_ps.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_abort_ps.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,3 +1,4 @@ !include ../galera_2nodes.cnf + [mysqltest] -ps-protocol \ No newline at end of file +ps-protocol diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_background_statistics.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_background_statistics.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_background_statistics.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_background_statistics.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,5 +5,3 @@ [mysqld.2] innodb_stats_persistent=ON - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_background_statistics.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_background_statistics.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_background_statistics.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_background_statistics.test 2025-05-19 16:14:24.000000000 +0000 @@ -46,4 +46,3 @@ --enable_query_log DROP TABLE t1; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_kill.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_kill.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_kill.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_kill.test 2025-05-19 16:14:24.000000000 +0000 @@ -113,7 +113,7 @@ --connection node_2b SET SESSION wsrep_sync_wait=0; ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE = 'Waiting for table metadata lock'; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'; --source include/wait_condition.inc --connection node_2a diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_kill_debug.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_kill_debug.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_kill_debug.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_kill_debug.test 2025-05-19 16:14:24.000000000 +0000 @@ -110,7 +110,7 @@ --connection node_2a --let $connection_id = `SELECT CONNECTION_ID()` -CREATE TABLE t1 (i int primary key); +CREATE TABLE t1 (i int primary key) engine=innodb; # Set up sync point SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue"; @@ -129,17 +129,17 @@ --enable_query_log SET DEBUG_SYNC = "now SIGNAL bwoc_continue"; -SET DEBUG_SYNC='RESET'; --connection node_2a --error 0,1213,2013,2026 --reap --connection node_2 +SET DEBUG_SYNC='RESET'; # victim was able to complete the INSERT select * from t1; --disconnect node_2a +--disconnect node_2b --connection node_1 drop table t1; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_lock_wait.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_lock_wait.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_bf_lock_wait.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_bf_lock_wait.test 2025-05-19 16:14:24.000000000 +0000 @@ -97,4 +97,3 @@ --disconnect node_1_p2 --disconnect node_2_p1 --disconnect node_2_p2 - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_checksum.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_checksum.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_checksum.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_checksum.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -9,5 +9,3 @@ binlog-checksum=CRC32 master-verify-checksum=1 slave-sql-verify-checksum=1 - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_checksum.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_checksum.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_checksum.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_checksum.test 2025-05-19 16:14:24.000000000 +0000 @@ -38,8 +38,6 @@ --connection node_1 DROP TABLE t1; ---disable_query_log SET @@global.wsrep_mode=DEFAULT; ---enable_query_log --echo # End of tests. diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_event_max_size_max.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_max.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_event_max_size_max.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_max.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,6 +4,3 @@ binlog-row-event-max-size=4294967040 [mysqld.2] - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,6 +4,3 @@ binlog-row-event-max-size=256 [mysqld.2] - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_event_max_size_min.test 2025-05-19 16:14:24.000000000 +0000 @@ -12,4 +12,3 @@ SELECT COUNT(*) = 1 FROM t1 WHERE f1 = REPEAT('x', 1000); DROP TABLE t1; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_row_image.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_row_image.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_row_image.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_row_image.test 2025-05-19 16:14:24.000000000 +0000 @@ -94,7 +94,3 @@ DROP TABLE t1; DROP TABLE t2; - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_stmt_autoinc.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_stmt_autoinc.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_binlog_stmt_autoinc.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_binlog_stmt_autoinc.test 2025-05-19 16:14:24.000000000 +0000 @@ -5,15 +5,15 @@ --source include/galera_cluster.inc --source include/force_restart.inc +--let $node_1=node_1 +--let $node_2=node_2 +--source include/auto_increment_offset_save.inc + --connection node_1 SET GLOBAL auto_increment_offset=1; --connection node_2 SET GLOBAL auto_increment_offset=2; ---let $node_1=node_1 ---let $node_2=node_2 ---source include/auto_increment_offset_save.inc - ## ## Verify the correct operation of the auto-increment when the binlog ## format artificially set to the 'STATEMENT' (although this mode is diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_cache_index.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_cache_index.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_cache_index.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_cache_index.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc CREATE TABLE t1 (c1 int, UNIQUE INDEX (c1)) engine=innodb; INSERT INTO t1 VALUES (1),(2),(3),(4),(5); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_can_run_toi.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_can_run_toi.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_can_run_toi.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_can_run_toi.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc # # MDEV-24833 : Signal 11 on wsrep_can_run_in_toi at wsrep_mysqld.cc:1994 # diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_change_user.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_change_user.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_change_user.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_change_user.test 2025-05-19 16:14:24.000000000 +0000 @@ -26,4 +26,3 @@ --connection node_1 DROP TABLE t1; DROP USER user1; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_circular_replication.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_circular_replication.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_circular_replication.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_circular_replication.test 2025-05-19 16:14:24.000000000 +0000 @@ -45,6 +45,7 @@ --let $node_1 = replica1 --let $node_2 = node_2 --let $node_3 = primary2 +--let $node_4 = primary1 --source include/auto_increment_offset_save.inc --connection replica1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_concurrent_ctas.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_concurrent_ctas.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_concurrent_ctas.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_concurrent_ctas.test 2025-05-19 16:14:24.000000000 +0000 @@ -98,4 +98,3 @@ --source include/galera_end.inc --echo # End of test - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_create_trigger.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_create_trigger.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_create_trigger.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_create_trigger.test 2025-05-19 16:14:24.000000000 +0000 @@ -41,4 +41,3 @@ DROP TABLE definer_default; DROP USER 'user1'; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ctas.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_ctas.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ctas.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ctas.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,6 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_aria.inc --connection node_1 create table t1_Aria(a int, count int, b int, key(b)) engine=Aria; @@ -36,4 +38,3 @@ DROP TABLE t2, t3,t4; DROP TABLE t1_MyISAM, t1_Aria,t1_InnoDB; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ddl_fk_conflict.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_fk_conflict.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ddl_fk_conflict.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_fk_conflict.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,5 +7,3 @@ [mysqld.2] wsrep-debug=1 loose-galera-ddl-fk-conflict=1 - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ddl_fk_conflict.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_fk_conflict.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ddl_fk_conflict.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_fk_conflict.test 2025-05-19 16:14:24.000000000 +0000 @@ -43,4 +43,3 @@ --source galera_ddl_fk_conflict_with_tmp.inc # CHECK and ANALYZE are not affected - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ddl_multiline.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_multiline.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ddl_multiline.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ddl_multiline.test 2025-05-19 16:14:24.000000000 +0000 @@ -51,4 +51,3 @@ --connection node_1 DROP TABLE t1, t2, t3, t4, t5, t6; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_defaults.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_defaults.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_defaults.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_defaults.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gmcast.segment=1' +wsrep_provider_options='gmcast.segment=1;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gmcast.segment=1' +wsrep_provider_options='gmcast.segment=1;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_defaults.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_defaults.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_defaults.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_defaults.test 2025-05-19 16:14:24.000000000 +0000 @@ -13,11 +13,13 @@ --source include/force_restart.inc # Make sure that the test is operating on the right version of galera library. ---let $galera_version=26.4.11 +--let $galera_version=26.4.21 source ../wsrep/include/check_galera_version.inc; # Global Variables +SELECT COUNT(*) `expect 51` FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME LIKE 'wsrep_%'; + SELECT VARIABLE_NAME, VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME LIKE 'wsrep_%' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_disallow_local_gtid.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_disallow_local_gtid.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_disallow_local_gtid.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_disallow_local_gtid.test 2025-05-19 16:14:24.000000000 +0000 @@ -101,4 +101,3 @@ DROP TABLE tab1; DROP TABLE tab2; DROP TABLE tab3; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_encrypt_tmp_files.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_encrypt_tmp_files.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_encrypt_tmp_files.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_encrypt_tmp_files.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ !include ../galera_2nodes.cnf -[mysqld] +[mysqld] encrypt-tmp-files = 1 plugin-load-add= @ENV.FILE_KEY_MANAGEMENT_SO file-key-management diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_fk_truncate.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_fk_truncate.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_fk_truncate.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_fk_truncate.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,9 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +auto_increment_offset=1 +auto_increment_increment=1 + +[mysqld.2] +auto_increment_offset=2 +auto_increment_increment=1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_flush_local.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_flush_local.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_flush_local.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_flush_local.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,6 +3,7 @@ # PXC-391 --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_query_cache.inc --disable_warnings @@ -72,7 +73,6 @@ SELECT COUNT(*) AS EXPECT_10000 FROM t2; SELECT COUNT(*) AS EXPECT_10 FROM x2; - --connection node_1 DROP TABLE t1, t2, x1, x2; CREATE TABLE t1 (f1 INTEGER); @@ -144,4 +144,3 @@ --disable_query_log SET GLOBAL wsrep_mode = DEFAULT; --enable_query_log - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_forced_binlog_format.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_forced_binlog_format.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_forced_binlog_format.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_forced_binlog_format.test 2025-05-19 16:14:24.000000000 +0000 @@ -49,4 +49,3 @@ #--source include/galera_end.inc --echo # End of tests - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.recover=yes;pc.ignore_sb=true' +wsrep_provider_options='gcache.recover=yes;gcache.size=128M;pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;pc.wait_prim_timeout=PT60S' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.recover=yes' +wsrep_provider_options='gcache.recover=yes;gcache.size=128M;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;pc.wait_prim_timeout=PT60S' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,6 +3,7 @@ # --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/big_test.inc CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -3,7 +3,7 @@ [mysqld.1] max_allowed_packet=10M innodb_log_file_size=220M -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.recover=yes;pc.ignore_sb=true;gcache.size=10M' +wsrep_provider_options='gcache.recover=yes;gcache.size=10M;pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;pc.wait_prim_timeout=PT60S' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.recover=yes;pc.ignore_sb=true;gcache.size=10M' +wsrep_provider_options='gcache.recover=yes;gcache.size=10M;pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;pc.wait_prim_timeout=PT60S' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_full_gcache.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,6 +3,7 @@ # --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/big_test.inc SET SESSION wsrep_sync_wait = 0; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -2,8 +2,8 @@ [mysqld.1] innodb_log_file_size=220M -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.recover=yes;pc.ignore_sb=true;' +wsrep_provider_options='gcache.recover=yes;gcache.size=128M;pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;pc.wait_prim_timeout=PT60S' [mysqld.2] innodb_log_file_size=220M -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.recover=yes;pc.ignore_sb=true;' +wsrep_provider_options='gcache.recover=yes;gcache.size=128M;pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;pc.wait_prim_timeout=PT60S' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcache_recover_manytrx.test 2025-05-19 16:14:24.000000000 +0000 @@ -5,6 +5,7 @@ --source include/galera_cluster.inc --source include/big_test.inc +--source include/have_innodb.inc --source include/have_log_bin.inc SET SESSION wsrep_sync_wait = 0; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcs_fragment.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcs_fragment.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcs_fragment.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcs_fragment.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,7 @@ !include ../galera_2nodes.cnf + [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcs.max_packet_size=64' +wsrep_provider_options='gcs.max_packet_size=64;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' + [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcs.max_packet_size=64' +wsrep_provider_options='gcs.max_packet_size=64;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcs_max_packet_size.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcs_max_packet_size.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gcs_max_packet_size.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gcs_max_packet_size.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,7 @@ !include ../galera_2nodes.cnf + [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcs.max_packet_size=64;gcache.size=10M' +wsrep_provider_options='gcs.max_packet_size=64;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' + [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcs.max_packet_size=64;gcache.size=10M' +wsrep_provider_options='gcs.max_packet_size=64;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,4 +7,3 @@ [mysqld.2] log-bin log-slave-updates - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_server_id.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_server_id.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_server_id.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_server_id.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -13,4 +13,3 @@ server-id=12 log_slave_updates log_bin - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_slave.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_slave.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,13 +4,16 @@ log-bin=mysqld-bin log-slave-updates binlog-format=ROW + [mysqld.1] gtid-domain-id=1 wsrep_gtid_mode=1 wsrep_gtid_domain_id=1 + [mysqld.2] gtid-domain-id=1 wsrep_gtid_mode=1 wsrep_gtid_domain_id=1 + [mysqld.3] gtid-domain-id=2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,14 +5,16 @@ log-slave-updates binlog-format=ROW wsrep_sst_method=rsync + [mysqld.1] gtid-domain-id=1 wsrep_gtid_mode=1 wsrep_gtid_domain_id=1 + [mysqld.2] gtid-domain-id=1 wsrep_gtid_mode=1 wsrep_gtid_domain_id=1 + [mysqld.3] gtid-domain-id=2 - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_slave_sst_rsync.test 2025-05-19 16:14:24.000000000 +0000 @@ -13,6 +13,7 @@ # As node #3 is not a Galera node, and galera_cluster.inc does not open connetion to it # we open the node_3 connection here --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + --echo #Connection 2 --connection node_2 --disable_query_log @@ -30,6 +31,7 @@ SELECT @@global.gtid_binlog_state; --source include/save_master_gtid.inc + --echo #Connection 2 --connection node_2 --source include/sync_with_master_gtid.inc @@ -39,6 +41,7 @@ INSERT INTO t2 VALUES(5,55); INSERT INTO t2 VALUES(6,66); SELECT @@global.gtid_binlog_state; + --echo #Connection 1 --connection node_1 --let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME= 't2'; @@ -60,6 +63,7 @@ INSERT INTO t1 VALUES ('node1_committed_before'); COMMIT; --source include/save_master_gtid.inc + --echo #Connection 2 --connection node_2 --source include/sync_with_master_gtid.inc @@ -68,6 +72,7 @@ INSERT INTO t1 VALUES ('node2_committed_before'); INSERT INTO t1 VALUES ('node2_committed_before'); COMMIT; + --echo #Connection 1 --connection node_1 --let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME= 't1'; @@ -77,10 +82,12 @@ --let $node_1= node_1 --let $node_2= node_2 --source include/auto_increment_offset_save.inc + --echo #Connection 2 --connection node_2 --echo Shutting down server ... --source include/shutdown_mysqld.inc + --echo #Connection 1 --connection node_1 --let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' @@ -90,6 +97,7 @@ INSERT INTO t1 VALUES ('node1_committed_during'); INSERT INTO t1 VALUES ('node1_committed_during'); COMMIT; + --echo #Connection 2 --connection node_2 --echo Starting server ... @@ -103,11 +111,13 @@ INSERT INTO t1 VALUES ('node2_committed_after'); INSERT INTO t1 VALUES ('node2_committed_after'); COMMIT; + --echo #Connection 1 --connection node_1 --let $wait_condition = SELECT COUNT(*) = 8 FROM t1; --source include/wait_condition.inc Select * from t1 order by f1; + --echo #Connection 2 --connection node_2 Select * from t1 order by f1; @@ -153,12 +163,14 @@ INSERT INTO t1 VALUES ('node2_slave_started'); SELECT count(*) from t1; SELECT @@global.gtid_binlog_state; + --echo #Connection 1 --connection node_1 --let $wait_condition = SELECT COUNT(*) = 12 FROM t1; --source include/wait_condition.inc SELECT count(*) from t1; SELECT @@global.gtid_binlog_state; + --echo #Connection 3 --connection node_3 DROP TABLE t2,t1; @@ -173,10 +185,12 @@ --connection node_2 --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2'; --source include/wait_condition.inc + --echo #Connection 1 --connection node_1 --let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; --source include/wait_condition.inc + --echo #Connection 2 --connection node_2 STOP SLAVE; @@ -194,6 +208,7 @@ set global wsrep_on=OFF; reset master; set global wsrep_on=ON; + --echo #Connection 3 --connection node_3 reset master; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_trx_conflict.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_trx_conflict.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_gtid_trx_conflict.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_gtid_trx_conflict.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,10 +4,12 @@ log-bin=mysqld-bin log-slave-updates binlog-format=ROW + [mysqld.1] gtid-domain-id=1 wsrep_gtid_mode=1 wsrep_gtid_domain_id=1 + [mysqld.2] gtid-domain-id=1 wsrep_gtid_mode=1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_inject_bf_long_wait.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_inject_bf_long_wait.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_inject_bf_long_wait.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_inject_bf_long_wait.test 2025-05-19 16:14:24.000000000 +0000 @@ -6,9 +6,14 @@ INSERT INTO t1 VALUES (0,0),(1,1),(2,2),(3,3); BEGIN; +SET DEBUG_SYNC = 'wsrep_after_statement_enter SIGNAL blocked'; --send UPDATE t1 set b = 100 where id between 1 and 2; --connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1 + +SET DEBUG_SYNC = 'now WAIT_FOR blocked'; +SET DEBUG_SYNC = 'wsrep_after_statement_enter CLEAR'; + --connection node_1b SET @save_dbug = @@SESSION.debug_dbug; SET @@SESSION.innodb_lock_wait_timeout=2; @@ -21,5 +26,6 @@ --reap COMMIT; SELECT * FROM t1; +SET DEBUG_SYNC = 'RESET'; --disconnect node_1b DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_insert_ignore.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_insert_ignore.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_insert_ignore.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_insert_ignore.test 2025-05-19 16:14:24.000000000 +0000 @@ -57,4 +57,3 @@ DROP TABLE t2; DROP TABLE t3; --eval SET GLOBAL wsrep_sync_wait = $wsrep_sync_wait_orig - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_insert_multi.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_insert_multi.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_insert_multi.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_insert_multi.test 2025-05-19 16:14:24.000000000 +0000 @@ -113,10 +113,3 @@ SELECT COUNT(*) = 2 FROM t1; DROP TABLE t1; - - - - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_MDEV-28423.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_MDEV-28423.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_MDEV-28423.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_MDEV-28423.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -35,10 +35,10 @@ log_bin=binlog [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_MDEV-28583.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_MDEV-28583.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_MDEV-28583.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_MDEV-28583.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -35,10 +35,10 @@ log_bin=binlog [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mariabackup.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mariabackup.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,10 +5,10 @@ wsrep_sst_auth=root: [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mariabackup_innodb_flush_logs.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup_innodb_flush_logs.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mariabackup_innodb_flush_logs.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup_innodb_flush_logs.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,10 +7,10 @@ innodb_flush_log_at_trx_commit=0 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mariabackup_verify_ca.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup_verify_ca.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mariabackup_verify_ca.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mariabackup_verify_ca.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -9,11 +9,11 @@ ssl-ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' loose-innodb-log-file-buffering [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' loose-innodb-log-file-buffering [sst] diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mysqldump.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mysqldump.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mysqldump.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mysqldump.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,7 +4,7 @@ # causes the first MTR connection to be forefully dropped by Galera, which in turn confuses MTR [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mysqldump.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mysqldump.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_mysqldump.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_mysqldump.test 2025-05-19 16:14:24.000000000 +0000 @@ -2,14 +2,14 @@ --source include/galera_cluster.inc --source include/have_innodb.inc ---source suite/galera/include/galera_sst_set_mysqldump.inc - call mtr.add_suppression("WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to "); --let $node_1=node_1 --let $node_2=node_2 --source include/auto_increment_offset_save.inc +--source suite/galera/include/galera_sst_set_mysqldump.inc + # mysql-wsrep#33 - nnoDB: Failing assertion: xid_seqno > trx_sys_cur_xid_seqno in trx_sys_update_wsrep_checkpoint with mysqldump IST # --source suite/galera/include/galera_st_disconnect_slave.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_progress.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_progress.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_progress.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_progress.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,4 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' - - - +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_recv_bind.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_recv_bind.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_recv_bind.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_recv_bind.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,8 +1,7 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;ist.recv_bind=127.0.0.1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;ist.recv_bind=127.0.0.1;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;ist.recv_bind=127.0.0.1' - +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;ist.recv_bind=127.0.0.1;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_restart_joiner.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_restart_joiner.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_restart_joiner.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_restart_joiner.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,4 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_rsync.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_rsync.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_rsync.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_rsync.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,9 +4,9 @@ wsrep_sst_method=rsync [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sync_wait=1 [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sync_wait=1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_rsync_verify_ca.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_rsync_verify_ca.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ist_rsync_verify_ca.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ist_rsync_verify_ca.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -8,10 +8,10 @@ ssl-ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [sst] ssl-mode=VERIFY_CA diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_kill_applier.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_kill_applier.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_kill_applier.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_kill_applier.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,9 @@ !include ../galera_2nodes.cnf [mysqld.1] +wsrep_slave_threads=1 wsrep-debug=1 [mysqld.2] +wsrep_slave_threads=1 wsrep-debug=1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_kill_smallchanges.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_kill_smallchanges.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_kill_smallchanges.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_kill_smallchanges.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,6 +3,7 @@ # --source include/galera_cluster.inc +--source include/have_innodb.inc # Save original auto_increment_offset values. --let $node_1=node_1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_load_data.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_load_data.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_load_data.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_load_data.test 2025-05-19 16:14:24.000000000 +0000 @@ -397,4 +397,3 @@ --connection node_1 use test; drop database cardtest02; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_log_bin_opt.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_log_bin_opt.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_log_bin_opt.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_log_bin_opt.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,10 +5,10 @@ wsrep_sst_auth="root:" [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_many_rows.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_many_rows.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_many_rows.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_many_rows.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,6 @@ --source include/big_test.inc --source include/galera_cluster.inc +--source include/have_innodb.inc # Save original auto_increment_offset values. --let $node_1=node_1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_mdev_13787.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_13787.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_mdev_13787.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_13787.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,5 +5,3 @@ [mysqld.2] innodb-stats-persistent=1 - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_mdev_13787.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_13787.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_mdev_13787.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_13787.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,6 @@ --source include/galera_cluster.inc --source include/have_innodb.inc + --connection node_1 create table t(a int); insert into t select 1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_mdev_15611.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_15611.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_mdev_15611.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdev_15611.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ !include ../galera_2nodes.cnf + [mysqld.1] [mysqld.2] diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_mdl_race.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdl_race.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_mdl_race.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_mdl_race.test 2025-05-19 16:14:24.000000000 +0000 @@ -91,4 +91,3 @@ --disconnect node_1a --disconnect node_1b --disconnect node_1c - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_nonPK_and_PA.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_nonPK_and_PA.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_nonPK_and_PA.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_nonPK_and_PA.test 2025-05-19 16:14:24.000000000 +0000 @@ -26,7 +26,6 @@ --source include/have_debug_sync.inc --source include/galera_have_debug_sync.inc - # Setup CREATE TABLE t1 (f1 VARCHAR(32) NOT NULL) ENGINE=InnoDB; @@ -44,7 +43,7 @@ SET GLOBAL wsrep_slave_threads = 2; --echo *************************************************************** ---echo scenario 1, conflicting UPDATE +--echo scenario 1, conflicting UPDATE --echo *************************************************************** # Set up a synchronization point to catch the first transaction @@ -99,9 +98,8 @@ --source include/galera_signal_sync_point.inc --source include/galera_clear_sync_point.inc - --echo *************************************************************** ---echo scenario 2, conflicting DELETE +--echo scenario 2, conflicting DELETE --echo *************************************************************** # Set up a synchronization point to catch the first transaction @@ -164,5 +162,6 @@ DROP TABLE t1; DROP TABLE t2; + --connection node_2 SET GLOBAL wsrep_slave_threads = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_nopk_unicode.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_nopk_unicode.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_nopk_unicode.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_nopk_unicode.test 2025-05-19 16:14:24.000000000 +0000 @@ -39,5 +39,4 @@ SELECT f1 = 'текÑÑ‚2' FROM t1; SELECT f1 = 'текÑÑ‚2' FROM t1 WHERE f1 = 'текÑÑ‚2'; - DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_parallel_apply_lock_table.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_apply_lock_table.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_parallel_apply_lock_table.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_apply_lock_table.test 2025-05-19 16:14:24.000000000 +0000 @@ -29,11 +29,13 @@ --let $galera_connection_name = node_2a --let $galera_server_number = 2 --source include/galera_connect.inc + --connection node_2a --sleep 1 SET SESSION wsrep_sync_wait=0; -SELECT COUNT(*) AS EXPECT_1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE (STATE LIKE 'Commit' or STATE = 'Waiting for certification'); -SELECT COUNT(*) AS EXPECT_1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE '%Waiting for table metadata lock%'; +SELECT COUNT(*) AS EXPECT_1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE '%committing%' OR STATE LIKE 'Commit' OR STATE LIKE 'Waiting for certification'); +SELECT COUNT(*) AS EXPECT_1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); + SELECT COUNT(*) AS EXPECT_0 FROM t1; SELECT COUNT(*) AS EXPECT_0 FROM t2; @@ -44,8 +46,11 @@ --eval SET SESSION wsrep_sync_wait = $wsrep_sync_wait_orig; SELECT COUNT(*) AS EXPECT_1 FROM t1; SELECT COUNT(*) AS EXPECT_1 FROM t2; -SELECT COUNT(*) AS EXPECT_2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE (STATE LIKE '%committed%' or STATE = 'Waiting for certification'); +SELECT COUNT(*) AS EXPECT_2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE '%committed%' OR STATE LIKE 'Waiting for certification'); +--disable_query_log --eval SET GLOBAL wsrep_slave_threads = $wsrep_slave_threads_orig; +--enable_query_log + DROP TABLE t1; DROP TABLE t2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_parallel_autoinc_largetrx.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_autoinc_largetrx.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_parallel_autoinc_largetrx.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_autoinc_largetrx.test 2025-05-19 16:14:24.000000000 +0000 @@ -67,4 +67,3 @@ --connection default DROP TABLE t1; DROP TABLE ten; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_parallel_autoinc_manytrx.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_autoinc_manytrx.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_parallel_autoinc_manytrx.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_autoinc_manytrx.test 2025-05-19 16:14:24.000000000 +0000 @@ -91,5 +91,3 @@ DROP TABLE t1; DROP TABLE ten; DROP PROCEDURE p1; - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_parallel_simple.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_simple.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_parallel_simple.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_parallel_simple.test 2025-05-19 16:14:24.000000000 +0000 @@ -48,10 +48,10 @@ --connection node_2 SET SESSION wsrep_sync_wait = 0; ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE 'Waiting for table metadata lock%'; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'); --source include/wait_condition.inc ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE (STATE LIKE 'Commit' or STATE = 'Waiting for certification'); +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE '%committing%' OR STATE LIKE 'Commit' OR STATE LIKE 'Waiting for certification'); --source include/wait_condition.inc UNLOCK TABLES; @@ -61,7 +61,9 @@ SELECT COUNT(*) as expect_20 FROM t1; SELECT COUNT(*) as expect_20 FROM t2; +--disable_query_log --eval SET GLOBAL wsrep_slave_threads = $wsrep_slave_threads_orig; +--enable_query_log DROP TABLE t1; DROP TABLE t2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_partitioned_tables.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_partitioned_tables.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_partitioned_tables.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_partitioned_tables.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,133 @@ +--source include/galera_cluster.inc +--source include/have_partition.inc +--source include/have_innodb.inc +--source include/have_aria.inc + +call mtr.add_suppression("WSREP: wsrep_mode = STRICT_REPLICATION enabled\\. Storage engine partition for table"); + +--echo # wsrep-mode= DEFAULT +SET GLOBAL wsrep_mode = ""; +SELECT @@wsrep_mode; +CREATE OR REPLACE TABLE t1 (v1 INT NOT NULL PRIMARY KEY) ENGINE=InnoDB + PARTITION BY KEY (v1) + PARTITIONS 2; +CREATE OR REPLACE TABLE t2 (v1 INT NOT NULL PRIMARY KEY) ENGINE=MyISAM + PARTITION BY KEY (v1) + PARTITIONS 2; +ALTER TABLE t1 ADD COLUMN v2 int; +ALTER TABLE t2 ADD COLUMN v2 int; +INSERT INTO t1 VALUES (1,1),(2,2); +INSERT INTO t2 VALUES (1,1),(2,2); +ALTER TABLE t1 ADD COLUMN v3 int, ENGINE=MyISAM; +ALTER TABLE t2 ADD COLUMN v3 int, ENGINE=Aria; +UPDATE t1 SET v3 = 3; +UPDATE t2 SET v3 = 3; +CREATE INDEX xx1 ON t1(v2); +CREATE INDEX xx2 ON t2(v2); +DROP INDEX xx1 ON t1; +DROP INDEX xx2 ON t2; +TRUNCATE TABLE t1; +TRUNCATE TABLE t2; +RENAME TABLE t1 TO t1_v2; +RENAME TABLE t2 TO t2_v2; +CREATE VIEW x1 AS SELECT * FROM t1_v2; +CREATE VIEW x2 AS SELECT * FROM t2_v2; +CREATE DEFINER=`root`@`localhost` TRIGGER increment_before_t1 + AFTER INSERT ON t1_v2 FOR EACH ROW + UPDATE t1_v2 SET t1_v2.v3 = t1_v2.v3+1; +CREATE DEFINER=`root`@`localhost` TRIGGER increment_before_t2 + AFTER INSERT ON t2_v2 FOR EACH ROW + UPDATE t2_v2 SET t2_v2.v3 = t2_v2.v3+1; + +--connection node_2 +SHOW CREATE TABLE t1_v2; +SHOW CREATE TABLE t2_v2; +SHOW CREATE VIEW x1; +SHOW CREATE VIEW x2; + +SELECT * FROM t1_v2; +SELECT * FROM t2_v2; + +--connection node_1 +DROP VIEW x1; +DROP VIEW x2; +DROP TRIGGER increment_before_t1; +DROP TRIGGER increment_before_t2; +DROP TABLE t1_v2; +DROP TABLE t2_v2; + +SET GLOBAL wsrep_mode = ""; +CREATE OR REPLACE TABLE t2 (v1 INT NOT NULL PRIMARY KEY) ENGINE=MyISAM + PARTITION BY KEY (v1) + PARTITIONS 2; +--echo # wsrep-mode= STRICT_REPLICATION +SET GLOBAL wsrep_mode = "STRICT_REPLICATION"; +SELECT @@wsrep_mode; +CREATE OR REPLACE TABLE t1 (v1 INT NOT NULL PRIMARY KEY) ENGINE=InnoDB + PARTITION BY KEY (v1) + PARTITIONS 2; +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +CREATE OR REPLACE TABLE t3 (v1 INT NOT NULL PRIMARY KEY) ENGINE=MyISAM + PARTITION BY KEY (v1) + PARTITIONS 2; +ALTER TABLE t1 ADD COLUMN v2 int; +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +ALTER TABLE t2 ADD COLUMN v2 int; +INSERT INTO t1 VALUES (1,1),(2,2); +INSERT INTO t2 VALUES (1),(2); +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +ALTER TABLE t1 ADD COLUMN v3 int, ENGINE=MyISAM; +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +ALTER TABLE t2 ADD COLUMN v3 int, ENGINE=Aria; +UPDATE t1 SET v2 = v2 + 3; +UPDATE t2 SET v1 = v1 + 3; +CREATE INDEX xx1 ON t1(v2); +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +CREATE INDEX xx2 ON t2(v2); +DROP INDEX xx1 ON t1; +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +DROP INDEX xx2 on t2; +TRUNCATE TABLE t1; +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +TRUNCATE TABLE t2; +# At the moment can't restrict rename +RENAME TABLE t1 TO t1_v2; +RENAME TABLE t2 TO t2_v2; +RENAME TABLE t2_v2 TO t2; +CREATE VIEW x1 AS SELECT * FROM t1_v2; +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +CREATE VIEW x2 AS SELECT * FROM t2; +CREATE DEFINER=`root`@`localhost` TRIGGER increment_before_t1 + AFTER INSERT ON t1_v2 FOR EACH ROW + UPDATE t1_v2 SET t1_v2.v2 = t1_v2.v2+1; +--error ER_GALERA_REPLICATION_NOT_SUPPORTED +CREATE DEFINER=`root`@`localhost` TRIGGER increment_before_t2 + AFTER INSERT ON t2 FOR EACH ROW + UPDATE t2 SET t2.v1 = t2.v1+1; + +--connection node_2 +SHOW CREATE TABLE t1_v2; +SHOW CREATE TABLE t2; +SHOW CREATE VIEW x1; + +SELECT * FROM t1_v2; +SELECT * FROM t2; + +--connection node_1 +DROP VIEW x1; +DROP TRIGGER increment_before_t1; +DROP TABLE t1_v2; +# We allow dropping table +DROP TABLE t2; +SET GLOBAL wsrep_mode = ""; + +CREATE OR REPLACE TABLE t2 (v1 INT NOT NULL PRIMARY KEY) ENGINE=MyISAM + PARTITION BY KEY (v1) + PARTITIONS 2; +--echo # wsrep-mode= STRICT_REPLICATION +SET GLOBAL wsrep_mode = "STRICT_REPLICATION"; +SELECT @@wsrep_mode; +ALTER TABLE t2 ENGINE=InnoDB; +DROP TABLE t2; + +SET GLOBAL wsrep_mode = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_pc_ignore_sb.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,8 +4,7 @@ wsrep_debug=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' - +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_pc_recovery.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_pc_recovery.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_pc_recovery.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_pc_recovery.test 2025-05-19 16:14:24.000000000 +0000 @@ -33,8 +33,8 @@ # Perform --wsrep-recover and preserve the positions into variables by placing them in $MYSQL_TMP_DIR/galera_wsrep_start_position.inc and then --source'ing it ---exec $MYSQLD --defaults-group-suffix=.1 --defaults-file=$MYSQLTEST_VARDIR/my.cnf --innodb --wsrep-recover --log-error=$MYSQL_TMP_DIR/galera_wsrep_recover.1.log > $MYSQL_TMP_DIR/galera_wsrep_recover.1.log 2>&1 ---exec $MYSQLD --defaults-group-suffix=.2 --defaults-file=$MYSQLTEST_VARDIR/my.cnf --innodb --wsrep-recover --log-error=$MYSQL_TMP_DIR/galera_wsrep_recover.2.log > $MYSQL_TMP_DIR/galera_wsrep_recover.2.log 2>&1 +--exec $MYSQLD --defaults-group-suffix=.1 --defaults-file=$MYSQLTEST_VARDIR/my.cnf --wsrep-recover --loose-innodb --log-error=$MYSQL_TMP_DIR/galera_wsrep_recover.1.log > $MYSQL_TMP_DIR/galera_wsrep_recover.1.log 2>&1 +--exec $MYSQLD --defaults-group-suffix=.2 --defaults-file=$MYSQLTEST_VARDIR/my.cnf --wsrep-recover --loose-innodb --log-error=$MYSQL_TMP_DIR/galera_wsrep_recover.2.log > $MYSQL_TMP_DIR/galera_wsrep_recover.2.log 2>&1 --perl use strict; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_query_cache.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_query_cache.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,4 +7,3 @@ [mysqld.2] query_cache_type=1 query_cache_size=1355776 - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_query_cache_invalidate.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache_invalidate.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_query_cache_invalidate.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache_invalidate.test 2025-05-19 16:14:24.000000000 +0000 @@ -29,7 +29,7 @@ --connection node_3 --disable_query_log ---eval CHANGE MASTER TO master_host='127.0.0.1', master_user='root', MASTER_PORT=$NODE_MYPORT_1, master_use_gtid=current_pos +--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_1, master_use_gtid=current_pos; --enable_query_log START SLAVE; --source include/wait_for_slave_to_start.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_query_cache_sync_wait.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache_sync_wait.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_query_cache_sync_wait.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_query_cache_sync_wait.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,4 +7,3 @@ [mysqld.2] query_cache_type=1 query_cache_size=1355776 - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_read_only.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_read_only.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_read_only.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_read_only.test 2025-05-19 16:14:24.000000000 +0000 @@ -48,4 +48,3 @@ SET GLOBAL read_only=FALSE; DROP TABLE t1; DROP USER foo@localhost; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_repl_key_format_flat16.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_repl_key_format_flat16.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_repl_key_format_flat16.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_repl_key_format_flat16.test 2025-05-19 16:14:24.000000000 +0000 @@ -25,7 +25,6 @@ SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 234; SELECT COUNT(*) = 1 FROM t2 WHERE f1 = REPEAT('b', 256); - --disable_query_log --eval SET GLOBAL wsrep_provider_options = '$wsrep_provider_options_orig'; --enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_restart_nochanges.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_nochanges.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_restart_nochanges.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_nochanges.test 2025-05-19 16:14:24.000000000 +0000 @@ -37,4 +37,3 @@ --source include/auto_increment_offset_restore.inc --source include/galera_end.inc - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_restart_replica.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_replica.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_restart_replica.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_replica.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -17,4 +17,3 @@ wsrep_gtid_domain_id=16 gtid_domain_id=11 gtid_strict_mode=1 - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_restart_replica.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_replica.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_restart_replica.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_restart_replica.test 2025-05-19 16:14:24.000000000 +0000 @@ -40,6 +40,7 @@ --let $node_1 = node_1 --let $node_2 = replica +--let $node_3 = primary --source include/auto_increment_offset_save.inc --connection replica diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_savepoint_replay.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_savepoint_replay.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_savepoint_replay.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_savepoint_replay.test 2025-05-19 16:14:24.000000000 +0000 @@ -83,4 +83,3 @@ SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'c'; DROP TABLE t1; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequence_engine.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequence_engine.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequence_engine.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequence_engine.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,13 @@ --source include/galera_cluster.inc --source include/have_sequence.inc +--connection node_2 +let $restore_wsrep_ignore_apply_errors=`SELECT @@GLOBAL.wsrep_ignore_apply_errors`; +SET GLOBAL wsrep_ignore_apply_errors=0; + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +SET SESSION wsrep_sync_wait=0; SET GLOBAL wsrep_ignore_apply_errors=0; SET SESSION AUTOCOMMIT=0; SET SESSION max_error_count=0; @@ -11,6 +18,8 @@ --error ER_NO_SUCH_TABLE SHOW CREATE TABLE t0; ---connection node_1 -SET GLOBAL wsrep_ignore_apply_errors=DEFAULT; +--disable_query_log +--eval SET GLOBAL wsrep_ignore_apply_errors=$restore_wsrep_ignore_apply_errors +--enable_query_log +--disconnect node_2a diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,13 +1,9 @@ !include ../galera_2nodes.cnf [mysqld.1] -log-bin -log-slave-updates auto-increment-increment=2 auto-increment-offset=1 [mysqld.2] -log-bin -log-slave-updates auto-increment-increment=2 auto-increment-offset=2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences.combinations mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.combinations --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences.combinations 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.combinations 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,5 @@ +[binlogon] +log-bin +log-slave-updates + +[binlogoff] diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,9 @@ --source include/galera_cluster.inc --source include/have_innodb.inc +--source include/have_sequence.inc +--source include/have_aria.inc +--disable_ps2_protocol # # MDEV-19353 : Alter Sequence do not replicate to another nodes with in Galera Cluster # @@ -45,6 +48,7 @@ select NEXT VALUE FOR Seq1_1; --connection node_1 +SHOW CREATE SEQUENCE Seq1_1; DROP SEQUENCE Seq1_1; # @@ -316,6 +320,12 @@ DROP TABLE t1; DROP SEQUENCE t; +--connection node_2 +--let $wsrep_sync_wait_orig_2 = `SELECT @@wsrep_sync_wait` +SET SESSION wsrep_sync_wait=15; + +--connection node_1 + CREATE SEQUENCE t INCREMENT BY 0 CACHE=20 ENGINE=INNODB; CREATE TABLE t1(a int not null primary key default nextval(t), b int) engine=innodb; # @@ -338,6 +348,10 @@ SELECT * FROM t1; SELECT NEXTVAL(t); +--disable_query_log +--eval SET SESSION wsrep_sync_wait = $wsrep_sync_wait_orig_2 +--enable_query_log + --connection node_1 DROP TABLE t1; DROP SEQUENCE t; @@ -355,4 +369,17 @@ ALTER SEQUENCE IF EXISTS t MINVALUE=1; DROP TABLE t; + +--echo +--echo MDEV-32631: +--echo + +CREATE OR REPLACE TABLE t1(c INT ) ENGINE=ARIA; +SET SESSION WSREP_OSU_METHOD=RSU; +--error ER_NOT_SUPPORTED_YET +INSERT INTO t1 SELECT seq,concat(seq,1) FROM seq_1_to_100; +SET SESSION WSREP_OSU_METHOD=TOI; +DROP TABLE t1; + +--echo --echo End of 10.5 tests diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_bf_kill.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_bf_kill.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,9 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +auto-increment-increment=2 +auto-increment-offset=1 + +[mysqld.2] +auto-increment-increment=2 +auto-increment-offset=2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_bf_kill.combinations mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.combinations --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_bf_kill.combinations 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.combinations 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,5 @@ +[binlogon] +log-bin +log-slave-updates + +[binlogoff] diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_bf_kill.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_bf_kill.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_bf_kill.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,115 @@ +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc + +--disable_ps2_protocol +# +# We create InnoDB seqeuence with small cache that is then +# used as default value for column in table. +# +--connection node_1 +--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` +CREATE SEQUENCE s INCREMENT=0 CACHE=5 ENGINE=InnoDB; +CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1, 0), (3, 0); +--connection node_1 +START TRANSACTION; +INSERT INTO t1 VALUES (4, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (5, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (6, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (7, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (8, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (9, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (10, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (11, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (12, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (13, next value for s); # No conflict in cert +INSERT INTO t1 VALUES (14, next value for s); # No conflict in cert +SELECT * FROM t1 WHERE f1 > 0 FOR UPDATE; # Should cause GAP lock between 1 and 3 + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +SET SESSION wsrep_sync_wait=0; +# Block the applier on node #1 and issue a conflicting update on node #2 +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc + +# +# Send conflicting INSERT +# +--connection node_2 +INSERT INTO t1 VALUES (2, 2); # This should BF abort because of GAP lock + +--connection node_1a +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +# Block the commit, send the COMMIT and wait until it gets blocked +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_set_sync_point.inc + +--connection node_1 +--send COMMIT + +--connection node_1a + +--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +--let $galera_sync_point = abort_trx_end +--source include/galera_set_sync_point.inc +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc +--let $galera_sync_point = abort_trx_end commit_monitor_master_enter_sync +--source include/galera_wait_sync_point.inc + +# Let the transactions proceed +--source include/galera_clear_sync_point.inc +--let $galera_sync_point = abort_trx_end +--source include/galera_signal_sync_point.inc +--let $galera_sync_point = commit_monitor_master_enter_sync +--source include/galera_signal_sync_point.inc + +# Commit succeeds +--connection node_1 +--reap + +# wsrep_local_replays has increased by 1 +--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'` +--disable_query_log +--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old = 1 AS wsrep_local_replays; +--enable_query_log + +INSERT INTO t1 VALUES (22, next value for s); +INSERT INTO t1 VALUES (23, next value for s); +INSERT INTO t1 VALUES (24, next value for s); +INSERT INTO t1 VALUES (25, next value for s); +INSERT INTO t1 VALUES (26, next value for s); +INSERT INTO t1 VALUES (27, next value for s); +INSERT INTO t1 VALUES (28, next value for s); +INSERT INTO t1 VALUES (29, next value for s); +INSERT INTO t1 VALUES (30, next value for s); +INSERT INTO t1 VALUES (31, next value for s); +INSERT INTO t1 VALUES (32, next value for s); +INSERT INTO t1 VALUES (33, next value for s); +INSERT INTO t1 VALUES (34, next value for s); +INSERT INTO t1 VALUES (35, next value for s); + +--connection node_1 +SELECT * FROM t1; +SELECT LASTVAL(s); + +--connection node_2 +SELECT * FROM t1; +SELECT LASTVAL(s); + +--connection node_1 +SELECT NEXTVAL(s); + +--connection node_2 +SELECT NEXTVAL(s); + +DROP SEQUENCE s; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_transaction.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_transaction.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,9 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +auto-increment-increment=2 +auto-increment-offset=1 + +[mysqld.2] +auto-increment-increment=2 +auto-increment-offset=2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_transaction.combinations mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.combinations --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_transaction.combinations 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.combinations 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,5 @@ +[binlogon] +log-bin +log-slave-updates + +[binlogoff] diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_transaction.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sequences_transaction.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sequences_transaction.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,255 @@ +--source include/galera_cluster.inc +--source include/have_sequence.inc + +--disable_ps2_protocol +# +# Case 1: Separate transactions from few connections +# +--connection node_1 +CREATE SEQUENCE s INCREMENT=0 CACHE=5 ENGINE=InnoDB; +CREATE TABLE t1 (f1 INT PRIMARY KEY DEFAULT NEXTVAL(s), f2 INT) ENGINE=InnoDB; + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 + +--connection node_1 +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +COMMIT; + +--connection node_2 +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +COMMIT; + +--connection node_2a +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +COMMIT; + +--connection node_1a +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +COMMIT; + +--connection node_2 +SELECT LASTVAL(s); +--connection node_1 +SELECT LASTVAL(s); +--connection node_2a +SELECT LASTVAL(s); +--connection node_1a +SELECT LASTVAL(s); + +--connection node_1 +SELECT * FROM t1; +--connection node_2 +SELECT * FROM t1; + +--connection node_1 +DROP TABLE t1; +DROP SEQUENCE s; + +# +# Case 2: All rollback +# +--connection node_1 +CREATE SEQUENCE s INCREMENT=0 CACHE=5 ENGINE=InnoDB; +CREATE TABLE t1 (f1 INT PRIMARY KEY DEFAULT NEXTVAL(s), f2 INT) ENGINE=InnoDB; + +--connection node_1 +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +ROLLBACK; + +--connection node_2 +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +ROLLBACK; + +--connection node_2a +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +ROLLBACK; + +--connection node_1a +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +ROLLBACK; + +--connection node_2 +SELECT LASTVAL(s); +--connection node_1 +SELECT LASTVAL(s); +--connection node_2a +SELECT LASTVAL(s); +--connection node_1a +SELECT LASTVAL(s); + +--connection node_1 +SELECT * FROM t1; +--connection node_2 +SELECT * FROM t1; + +--connection node_1 +DROP TABLE t1; +DROP SEQUENCE s; +# +# Case 3: Mixed transactions +# +--connection node_1 +CREATE SEQUENCE s INCREMENT=0 CACHE=5 ENGINE=InnoDB; +CREATE TABLE t1 (f1 INT PRIMARY KEY DEFAULT NEXTVAL(s), f2 INT) ENGINE=InnoDB; + +--connection node_1 +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); + +--connection node_1a +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); + +--connection node_2a +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); + +--connection node_2 +BEGIN; +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); +INSERT INTO t1(f2) values (1); + +--connection node_1 +COMMIT; +--connection node_1a +ROLLBACK; +--connection node_2 +--error ER_LOCK_DEADLOCK +COMMIT; +--connection node_2a +--error ER_LOCK_DEADLOCK +ROLLBACK; + +--connection node_2 +SELECT LASTVAL(s); +--connection node_1 +SELECT LASTVAL(s); +--connection node_2a +SELECT LASTVAL(s); +--connection node_1a +SELECT LASTVAL(s); + +--connection node_1 +SELECT * FROM t1; +--connection node_2 +SELECT * FROM t1; + +--connection node_1 +DROP TABLE t1; +DROP SEQUENCE s; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_server.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_server.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_server.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_server.test 2025-05-19 16:14:24.000000000 +0000 @@ -25,4 +25,3 @@ --source include/galera_end.inc --echo # End of test - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_set_position_after_cert_failure.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_set_position_after_cert_failure.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_set_position_after_cert_failure.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_set_position_after_cert_failure.test 2025-05-19 16:14:24.000000000 +0000 @@ -95,4 +95,5 @@ --connection node_1 DROP TABLE t1; SET GLOBAL wsrep_slave_threads = DEFAULT; + --source include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_slave_replay.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_slave_replay.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_slave_replay.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_slave_replay.test 2025-05-19 16:14:24.000000000 +0000 @@ -6,6 +6,7 @@ # or rollback and replay (depending on the nature of lock conflict). # +--source include/galera_cluster.inc --source include/have_innodb.inc --source include/have_log_bin.inc --source include/have_debug.inc @@ -13,9 +14,7 @@ --source include/galera_have_debug_sync.inc --connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 - --connection node_2a ---source include/galera_cluster.inc ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sp_bf_abort.inc mariadb-10.11.13/mysql-test/suite/galera/t/galera_sp_bf_abort.inc --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sp_bf_abort.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sp_bf_abort.inc 2025-05-19 16:14:24.000000000 +0000 @@ -35,4 +35,3 @@ --source include/galera_signal_sync_point.inc --let $galera_sync_point = after_replicate_sync --source include/galera_signal_sync_point.inc - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_split_brain.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_split_brain.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_split_brain.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_split_brain.test 2025-05-19 16:14:24.000000000 +0000 @@ -13,6 +13,7 @@ --let $node_2=node_2 --source include/auto_increment_offset_save.inc +--connection node_2 call mtr.add_suppression("WSREP: TO isolation failed for: "); --connection node_1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sql_log_bin_zero.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sql_log_bin_zero.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sql_log_bin_zero.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sql_log_bin_zero.test 2025-05-19 16:14:24.000000000 +0000 @@ -17,7 +17,6 @@ INSERT INTO t1 VALUES (2); - --connection node_2 SELECT COUNT(*) = 2 FROM t1; SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=10M;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/cakey.pem' +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/cakey.pem;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=10M;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/cakey.pem' +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/cakey.pem;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl.test 2025-05-19 16:14:24.000000000 +0000 @@ -9,6 +9,10 @@ --source include/galera_cluster.inc --source include/have_innodb.inc --source include/big_test.inc +--source include/have_perfschema.inc + +# Verify that SSL is handled by the provider. +SELECT COUNT(*) `expect 0` FROM performance_schema.socket_instances WHERE EVENT_NAME LIKE '%wsrep%'; SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_cipher.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_cipher.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_cipher.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_cipher.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,11 @@ +!include ../galera_2nodes.cnf + +[mysqld] +loose-galera-ssl-cipher=1 +wsrep-debug=1 + +[mysqld.1] +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem;cert.log_conflicts=YES;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' + +[mysqld.2] +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem;cert.log_conflicts=YES;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_cipher.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_cipher.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_cipher.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_cipher.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,82 @@ +# +# Test upgrading the SSL chipher +# + +--source include/galera_cluster.inc +--source include/have_ssl_communication.inc +--source include/have_openssl.inc +--source include/force_restart.inc + +# +# Lowest supported Galera library version +# +--let $galera_version=26.4.21 +source ../wsrep/include/check_galera_version.inc; + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--source include/auto_increment_offset_save.inc + +# Setup galera ports +--connection node_1 +--source suite/galera/include/galera_base_port.inc +--let $NODE_GALERAPORT_1 = $_NODE_GALERAPORT + +--connection node_2 +--source suite/galera/include/galera_base_port.inc +--let $NODE_GALERAPORT_2 = $_NODE_GALERAPORT + +SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; +SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; + +# 2. Restart node #1 with a socket.ssl_cipher + +--connection node_1 +--source include/shutdown_mysqld.inc +--let $restart_noprint = 1 +--let $start_mysqld_params = --wsrep-cluster-address=gcomm://127.0.0.1:$NODE_GALERAPORT_2 --wsrep_provider_options=base_port=$NODE_GALERAPORT_1;socket.ssl=yes;socket.ssl_ca=$MYSQL_TEST_DIR/std_data/galera-upgrade-ca-cert.pem;socket.ssl_cert=$MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=$MYSQL_TEST_DIR/std_data/galera-key.pem;socket.ssl_cipher=AES256-SHA +--source include/start_mysqld.inc +--source include/wait_until_connected_again.inc + +--let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; +--source include/wait_condition.inc +SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; + +# 3. Restart node #2 with the new socket.ssl_ca , socket.ssl_cert, socket.ssl_key and socket.ssl_cipher + +--connection node_2 +--source include/shutdown_mysqld.inc +--let $start_mysqld_params = --wsrep_provider_options=base_port=$NODE_GALERAPORT_2;socket.ssl=yes;socket.ssl_ca=$MYSQL_TEST_DIR/std_data/galera-upgrade-ca-cert.pem;socket.ssl_cert=$MYSQL_TEST_DIR/std_data/galera-upgrade-server-cert.pem;socket.ssl_key=$MYSQL_TEST_DIR/std_data/galera-upgrade-server-key.pem;socket.ssl_cipher=AES256-SHA +--source include/start_mysqld.inc +--source include/wait_until_connected_again.inc + +--let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; +--source include/wait_condition.inc +SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; + +# 4. Restart node #1 with the new socket.ssl_ca , socket.ssl_cert, socket.ssl_key and socket.ssl_cipher + +--connection node_1 +--source include/shutdown_mysqld.inc +--let $start_mysqld_params = --wsrep-cluster-address=gcomm://127.0.0.1:$NODE_GALERAPORT_2 --wsrep_provider_options=base_port=$NODE_GALERAPORT_1;socket.ssl=yes;socket.ssl_ca=$MYSQL_TEST_DIR/std_data/galera-upgrade-ca-cert.pem;socket.ssl_cert=$MYSQL_TEST_DIR/std_data/galera-upgrade-server-cert.pem;socket.ssl_key=$MYSQL_TEST_DIR/std_data/galera-upgrade-server-key.pem;socket.ssl_cipher=AES256-SHA +--source include/start_mysqld.inc +--source include/wait_until_connected_again.inc + +--let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; +--source include/wait_condition.inc +SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; + +# 5. Make sure node_2 is ready as well +--connection node_2 +--source include/galera_wait_ready.inc + +# Upgrade complete. Both nodes now use the new key and certificate + +# Restore original auto_increment_offset values. +--source include/auto_increment_offset_restore.inc + +--connection node_1 +call mtr.add_suppression("WSREP: write_handler\\(\\)"); +--connection node_2 +call mtr.add_suppression("WSREP: write_handler\\(\\)"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_compression.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_compression.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_compression.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_compression.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/cakey.pem;socket.ssl_compression=YES' +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/cakey.pem;socket.ssl_compression=YES;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/cakey.pem;socket.ssl_compression=YES' +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/cakey.pem;socket.ssl_compression=YES;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_compression.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_compression.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_compression.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_compression.test 2025-05-19 16:14:24.000000000 +0000 @@ -8,6 +8,10 @@ --source include/galera_cluster.inc --source include/have_innodb.inc --source include/big_test.inc +--source include/have_perfschema.inc + +# Verify that SSL is handled by the provider. +SELECT COUNT(*) `expect 0` FROM performance_schema.socket_instances WHERE EVENT_NAME LIKE '%wsrep%'; SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_upgrade.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,7 +5,7 @@ wsrep-debug=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem' +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem' +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_upgrade.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_upgrade.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_ssl_upgrade.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_ssl_upgrade.test 2025-05-19 16:14:24.000000000 +0000 @@ -10,6 +10,9 @@ --source include/have_openssl.inc --source include/force_restart.inc +# Verify that SSL is handled by the provider. +SELECT COUNT(*) `expect 0` FROM performance_schema.socket_instances WHERE EVENT_NAME LIKE '%wsrep%'; + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_encrypted.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_encrypted.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_encrypted.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_encrypted.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -11,7 +11,7 @@ wsrep_sst_method=rsync [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,12 +6,12 @@ wsrep_debug=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' innodb_fast_shutdown=0 innodb_undo_tablespaces=0 [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' innodb_fast_shutdown=0 innodb_undo_tablespaces=3 loose_innodb_log_file_buffering diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_data_dir.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_data_dir.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_data_dir.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_data_dir.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,11 +6,11 @@ wsrep_debug=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] innodb_data_home_dir=@ENV.MYSQL_TMP_DIR/data_dir_test -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_force_recovery.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_force_recovery.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_force_recovery.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_force_recovery.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,10 +6,10 @@ wsrep_debug=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,28 @@ +!include ../galera_2nodes.cnf + +[mysqld] +wsrep_sst_method=mariabackup +wsrep_sst_auth="root:" +gtid_strict_mode=ON +wsrep-gtid_mode=ON +log-bin +log-slave_updates +loose-galera-sst-mariabackup-gtid=1 + +[mysqld.1] +wsrep_provider_options='pc.weight=2;pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' +gtid_domain_id=10 +wsrep_gtid_domain_id=100 +wsrep_slave_threads=4 +server-id=10 + +[mysqld.2] +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' +gtid_domain_id=10 +wsrep_gtid_domain_id=100 +wsrep_slave_threads=4 +server-id=10 + +[sst] +transferfmt=@ENV.MTR_GALERA_TFMT +streamfmt=mbstream diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_gtid.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,29 @@ +--source include/big_test.inc +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_mariabackup.inc +--source include/force_restart.inc + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--source include/auto_increment_offset_save.inc + +--source suite/galera/include/galera_st_shutdown_slave.inc +--source suite/galera/include/galera_st_clean_slave.inc + +--source suite/galera/include/galera_st_kill_slave.inc +--source suite/galera/include/galera_st_kill_slave_ddl.inc + +# Restore original auto_increment_offset values. +--source include/auto_increment_offset_restore.inc + +--connection node_1 +--echo # Node_1 +SHOW global variables like 'gtid%pos'; + +--connection node_2 +--echo # Node_2 +SHOW global variables like 'gtid%pos'; + +--source include/galera_end.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_logarchive.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_logarchive.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_logarchive.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_logarchive.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,10 +6,10 @@ wsrep_debug=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_lost_found.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_lost_found.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_lost_found.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_lost_found.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,10 +5,10 @@ wsrep_sst_auth="root:" [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_table_options.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_table_options.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_table_options.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_table_options.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,10 +7,10 @@ innodb-file-per-table=ON [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [sst] transferfmt=@ENV.MTR_GALERA_TFMT diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,4 +5,4 @@ wsrep_sst_auth="root:" [mariabackup] -use_memory=123m +use_memory=129m diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test 2025-05-19 16:14:24.000000000 +0000 @@ -40,8 +40,8 @@ --source include/wait_condition.inc # Confirm that IST did not take place ---let $assert_text = mariabackup: Using 128974848 bytes for buffer pool \(set by --use-memory parameter\) ---let $assert_select = mariabackup: Using 128974848 bytes for buffer pool \(set by --use-memory parameter\) +--let $assert_text = mariabackup: Using 134217728 bytes for buffer pool \(set by --use-memory parameter\) +--let $assert_select = mariabackup: Using 134217728 bytes for buffer pool \(set by --use-memory parameter\) --let $assert_count = 1 --let $assert_file = $MYSQLTEST_VARDIR/mysqld.2/data/mariabackup.prepare.log --let $assert_only_after = Starting InnoDB instance for recovery diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mysqldump.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mysqldump.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mysqldump.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mysqldump.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,6 +4,7 @@ # causes the first MTR connection to be forefully dropped by Galera, which in turn confuses MTR [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' + [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mysqldump_with_key.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mysqldump_with_key.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_mysqldump_with_key.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_mysqldump_with_key.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -9,10 +9,10 @@ loose-galera_sst_mysqldump_with_key=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [client] ssl-ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,7 +4,7 @@ wsrep_sst_method=rsync [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync2.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync2.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync2.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync2.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,11 +4,11 @@ wsrep_sst_method=rsync [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' log_bin=@ENV.MYSQLTEST_VARDIR/mysqld.1/server1_binlog log_bin_index=@ENV.MYSQLTEST_VARDIR/tmp/server1_binlog_index.index [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' log_bin=@ENV.MYSQLTEST_VARDIR/mysqld.2/server2_binlog log_bin_index=@ENV.MYSQLTEST_VARDIR/tmp/server2_binlog_index.index diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_binlogname.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_binlogname.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_binlogname.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_binlogname.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,9 +4,9 @@ wsrep_sst_method=rsync [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' log_bin=server1_binlog [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' log_bin=server2_binlog diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_data_dir.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_data_dir.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_data_dir.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_data_dir.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,11 +4,11 @@ wsrep_sst_method=rsync [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' innodb_data_home_dir=@ENV.MYSQL_TMP_DIR/rsync_test_2 -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' [sst] backup_threads=2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_capath.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_capath.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_capath.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_capath.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -14,7 +14,7 @@ ssl-mode=VERIFY_CA [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_key.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_key.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_key.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_key.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -8,7 +8,7 @@ tcert=@ENV.MYSQL_TEST_DIR/std_data/server-cert.pem [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_server.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_server.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_server.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_encrypt_with_server.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -10,7 +10,7 @@ ssl-mode=VERIFY_CA [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_gtid.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_gtid.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_gtid.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_gtid.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,23 @@ +!include ../galera_2nodes.cnf + +[mysqld] +wsrep_sst_method=rsync +gtid_strict_mode=ON +wsrep-gtid_mode=ON +log-bin +log-slave_updates +loose-galera-sst-rsync-gtid=1 + +[mysqld.1] +wsrep_provider_options='pc.weight=2;pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' +gtid_domain_id=10 +wsrep_gtid_domain_id=100 +wsrep_slave_threads=4 +server-id=10 + +[mysqld.2] +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' +gtid_domain_id=10 +wsrep_gtid_domain_id=100 +wsrep_slave_threads=4 +server-id=10 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_gtid.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_gtid.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_gtid.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_gtid.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,29 @@ +--source include/big_test.inc +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_mariabackup.inc +--source include/force_restart.inc + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--source include/auto_increment_offset_save.inc + +--source suite/galera/include/galera_st_shutdown_slave.inc +--source suite/galera/include/galera_st_clean_slave.inc + +--source suite/galera/include/galera_st_kill_slave.inc +--source suite/galera/include/galera_st_kill_slave_ddl.inc + +# Restore original auto_increment_offset values. +--source include/auto_increment_offset_restore.inc + +--connection node_1 +--echo # Node_1 +SHOW global variables like 'gtid%pos'; + +--connection node_2 +--echo # Node_2 +SHOW global variables like 'gtid%pos'; + +--source include/galera_end.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_logbasename.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_logbasename.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_logbasename.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_logbasename.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,11 +4,11 @@ wsrep_sst_method=rsync [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' log_basename=server1 log_bin [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' log_basename=server2 log_bin diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_recv_auto.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_recv_auto.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sst_rsync_recv_auto.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sst_rsync_recv_auto.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,14 +5,14 @@ bind-address=:: [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_incoming_address='[::1]:@mysqld.1.port' wsrep_node_address=::1 wsrep_sst_receive_address='[::1]:@mysqld.1.#sst_port' [mysqld.2] +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' +wsrep_node_incoming_address='[::1]:@mysqld.2.port' wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port;gcache.size=1;pc.ignore_sb=true' wsrep_node_address=::1 -wsrep_node_incoming_address='[::1]:@mysqld.2.port' wsrep_sst_receive_address=AUTO diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_status_cluster.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_cluster.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_status_cluster.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_cluster.test 2025-05-19 16:14:24.000000000 +0000 @@ -14,5 +14,3 @@ SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_status_local_index.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_local_index.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_status_local_index.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_local_index.test 2025-05-19 16:14:24.000000000 +0000 @@ -12,7 +12,6 @@ --connection node_2 INSERT INTO wsrep_local_indexes VALUES ((SELECT variable_value FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE variable_name = 'wsrep_local_index')); - --connection node_1 SELECT COUNT(*) = 2 FROM wsrep_local_indexes; SELECT COUNT(DISTINCT wsrep_local_index) = 2 FROM wsrep_local_indexes; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_status_local_state.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_local_state.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_status_local_state.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_status_local_state.test 2025-05-19 16:14:24.000000000 +0000 @@ -22,7 +22,3 @@ --source include/wait_condition.inc SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_strict_require_innodb.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_strict_require_innodb.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_strict_require_innodb.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_strict_require_innodb.test 2025-05-19 16:14:24.000000000 +0000 @@ -15,9 +15,10 @@ # In both cases apply flood control if >= 10 same warning # --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_aria.inc -call mtr.add_suppression("WSREP: wsrep_mode = STRICT_REPLICATION enabled. Storage engine .*"); +call mtr.add_suppression("WSREP: wsrep_mode = STRICT_REPLICATION enabled\\. Storage engine "); CREATE TABLE t1(a int NOT NULL PRIMARY KEY, b varchar(50)) ENGINE=INNODB; CREATE TABLE t2(a int NOT NULL PRIMARY KEY, b varchar(50)) ENGINE=MYISAM; @@ -114,4 +115,3 @@ SET GLOBAL log_warnings=DEFAULT; SET GLOBAL wsrep_mode=DEFAULT; --disable_query_log - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_strict_require_primary_key.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_strict_require_primary_key.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_strict_require_primary_key.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_strict_require_primary_key.test 2025-05-19 16:14:24.000000000 +0000 @@ -14,9 +14,10 @@ # In both cases apply flood control if >= 10 same warning # --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_aria.inc -call mtr.add_suppression("WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled. Table .*"); +call mtr.add_suppression("WSREP: wsrep_mode = REQUIRED_PRIMARY_KEY enabled\\. Table "); CREATE TABLE t1(a int, b varchar(50)) ENGINE=INNODB; CREATE TABLE t2(a int, b varchar(50)) ENGINE=MYISAM; @@ -140,4 +141,3 @@ SET GLOBAL log_warnings=DEFAULT; SET GLOBAL wsrep_mode=DEFAULT; --disable_query_log - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_suspend_slave.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_suspend_slave.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_suspend_slave.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_suspend_slave.test 2025-05-19 16:14:24.000000000 +0000 @@ -67,4 +67,3 @@ # Restore original auto_increment_offset values. --let $node_2=node_2a --source include/auto_increment_offset_restore.inc - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sync_wait_upto-master.opt mariadb-10.11.13/mysql-test/suite/galera/t/galera_sync_wait_upto-master.opt --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sync_wait_upto-master.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sync_wait_upto-master.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1 +1 @@ ---wsrep-sync-wait=0 --wsrep-causal-reads=OFF \ No newline at end of file +--wsrep-sync-wait=0 --wsrep-causal-reads=OFF diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_sync_wait_upto.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_sync_wait_upto.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_sync_wait_upto.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_sync_wait_upto.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,8 +3,8 @@ # --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_debug.inc ---source include/have_debug_sync.inc CREATE TABLE t1 (f1 INTEGER) ENGINE=InnoDB; INSERT INTO t1 VALUES (1); @@ -44,7 +44,6 @@ --eval SELECT WSREP_SYNC_WAIT_UPTO_GTID('$wsrep_last_committed_gtid') AS WSREP_SYNC_WAIT_UPTO; --enable_query_log - # Timeout if GTID is not received on time --disable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_table_with_hyphen.inc mariadb-10.11.13/mysql-test/suite/galera/t/galera_table_with_hyphen.inc --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_table_with_hyphen.inc 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_table_with_hyphen.inc 2025-05-19 16:14:24.000000000 +0000 @@ -45,4 +45,3 @@ --connection node_2 --eval drop table `$fk_child` --eval drop table `$fk_parent` - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_temporary_sequences.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_temporary_sequences.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_temporary_sequences.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_temporary_sequences.test 2025-05-19 16:14:24.000000000 +0000 @@ -30,7 +30,6 @@ SHOW CREATE TABLE seq1; SHOW CREATE TABLE seq2; - --connection node_1 DROP TABLE t; DROP SEQUENCE seq1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_threadpool.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_threadpool.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_threadpool.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_threadpool.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -13,5 +13,3 @@ log_slave_updates=ON wsrep_sst_method=rsync thread_handling = pool-of-threads - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_threadpool.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_threadpool.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_threadpool.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_threadpool.test 2025-05-19 16:14:24.000000000 +0000 @@ -5,7 +5,6 @@ --let $node_1 = node_1 --let $node_2 = node_2 - --source ../galera/include/auto_increment_offset_save.inc # diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_toi_ddl_nonconflicting.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_toi_ddl_nonconflicting.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_toi_ddl_nonconflicting.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_toi_ddl_nonconflicting.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,43 +1,81 @@ --source include/galera_cluster.inc --source include/have_innodb.inc +--source include/have_sequence.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/galera_have_debug_sync.inc # # In this test, we simultaneously send two non-conflicting ALTER TABLE statements # +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_1 CREATE TABLE t1 (f1 INTEGER PRIMARY KEY AUTO_INCREMENT, f2 INTEGER); +INSERT INTO t1(f2) SELECT seq FROM seq_1_to_1000; ---connection node_2 +--connection node_2a +SET SESSION wsrep_sync_wait=0; --let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; --source include/wait_condition.inc ---send ALTER TABLE t1 ADD COLUMN f3 INTEGER; INSERT INTO t1 (f1, f2) VALUES (DEFAULT, 123); +--let $wait_condition = SELECT COUNT(*) = 1000 FROM t1; +--source include/wait_condition.inc + +--connection node_1a +--echo # Block the applier on node_1 and issue a ddl from node_2 +SET SESSION wsrep_sync_wait=0; +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_set_sync_point.inc +--connection node_2 +--echo # DDL 1 +--send ALTER TABLE t1 ADD COLUMN f3 INTEGER; INSERT INTO t1 VALUES (NULL, 10000, 10000); + +--connection node_1a +--source include/galera_wait_sync_point.inc +--source include/galera_clear_sync_point.inc + +--echo # This will block on acquiring total order isolation --connection node_1 +--echo # DDL 2 --send CREATE UNIQUE INDEX i1 ON t1(f2); +--connection node_1a +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE 'acquiring total order%' or STATE LIKE 'Waiting for table metadata%' +--source include/wait_condition.inc + +--echo # Signal DDL 1 +--source include/galera_clear_sync_point.inc +--let $galera_sync_point = apply_monitor_slave_enter_sync +--source include/galera_signal_sync_point.inc + --connection node_2 --reap -INSERT INTO t1 (f1, f2) VALUES (DEFAULT, 234); +--connection node_1 +--reap + +--connection node_2 --let $wait_condition = SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; --source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; --source include/wait_condition.inc -SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; -SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; -SELECT COUNT(*) = 2 FROM t1; +SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; +SELECT COUNT(*) AS EXPECT_2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; +SHOW CREATE TABLE t1; +SELECT COUNT(*) AS EXPECT_1001 FROM t1; --connection node_1 ---reap - --let $wait_condition = SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; --source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; --source include/wait_condition.inc -SELECT COUNT(*) = 3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; -SELECT COUNT(*) = 2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; -SELECT COUNT(*) = 2 FROM t1; +SELECT COUNT(*) AS EXPECT_3 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't1'; +SELECT COUNT(*) AS EXPECT_2 FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_NAME = 't1'; +SHOW CREATE TABLE t1; +SELECT COUNT(*) AS EXPECT_1001 FROM t1; DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_toi_ftwrl.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_toi_ftwrl.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_toi_ftwrl.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_toi_ftwrl.test 2025-05-19 16:14:24.000000000 +0000 @@ -19,4 +19,3 @@ SHOW CREATE TABLE t1; DROP TABLE t1; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_transaction_read_only.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_transaction_read_only.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_transaction_read_only.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_transaction_read_only.test 2025-05-19 16:14:24.000000000 +0000 @@ -55,4 +55,3 @@ --enable_query_log DROP TABLE t1; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_udf.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_udf.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_udf.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_udf.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,10 +6,3 @@ [mysqld.2] query_cache_type=1 - - - - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_unicode_identifiers.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_unicode_identifiers.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_unicode_identifiers.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_unicode_identifiers.test 2025-05-19 16:14:24.000000000 +0000 @@ -75,4 +75,3 @@ DROP DATABASE `база`; DROP DATABASE `втора база`; --eval SET GLOBAL wsrep_sync_wait = $wsrep_sync_wait_orig - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_v1_row_events.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_v1_row_events.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_v1_row_events.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_v1_row_events.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,10 +4,3 @@ log-bin-use-v1-row-events=1 [mysqld.2] - - - - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_OSU_method2.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_OSU_method2.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_OSU_method2.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_OSU_method2.test 2025-05-19 16:14:24.000000000 +0000 @@ -44,4 +44,3 @@ --connection node_1a SET DEBUG_SYNC= 'RESET'; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test 2025-05-19 16:14:24.000000000 +0000 @@ -94,11 +94,13 @@ --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1 --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1 +--disconnect node_1a --connection node_2 --eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig --eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2 --eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2 +--disconnect node_2a --enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_auto_inc_control_on.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_auto_inc_control_on.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_auto_inc_control_on.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_auto_inc_control_on.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,9 +5,3 @@ [mysqld.2] wsrep-auto-increment-control=ON - - - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_ignore_apply_errors.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_ignore_apply_errors.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_ignore_apply_errors.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_ignore_apply_errors.test 2025-05-19 16:14:24.000000000 +0000 @@ -22,6 +22,8 @@ DROP TABLE t1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc SHOW TABLES; # Drop schema that does not exist @@ -33,6 +35,8 @@ DROP SCHEMA s1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME LIKE 's1'; +--source include/wait_condition.inc SHOW SCHEMAS; # Drop index that does not exist using DROP INDEX @@ -45,6 +49,10 @@ DROP INDEX idx1 ON t1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_SYS_INDEXES WHERE NAME LIKE 'idx1'; +--source include/wait_condition.inc SHOW CREATE TABLE t1; DROP TABLE t1; @@ -58,6 +66,10 @@ ALTER TABLE t1 DROP INDEX idx1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_SYS_INDEXES WHERE NAME LIKE 'idx1'; +--source include/wait_condition.inc SHOW CREATE TABLE t1; DROP TABLE t1; @@ -71,6 +83,11 @@ ALTER TABLE t1 DROP COLUMN f2; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_SYS_COLUMNS WHERE NAME LIKE 'f2'; +--source include/wait_condition.inc + SHOW CREATE TABLE t1; DROP TABLE t1; @@ -93,6 +110,10 @@ SELECT COUNT(*) AS expect_0 FROM t1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 0 FROM t1; +--source include/wait_condition.inc SELECT COUNT(*) AS expect_0 FROM t1; DROP TABLE t1; @@ -112,6 +133,10 @@ SELECT COUNT(*) AS expect_1 FROM t1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1; +--source include/wait_condition.inc SELECT COUNT(*) AS expect_1 FROM t1; DROP TABLE t1; @@ -136,6 +161,8 @@ SELECT COUNT(*) AS expect_0 FROM t1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 0 FROM t1; --source include/wait_condition.inc SELECT VARIABLE_VALUE expect_Primary FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_status'; @@ -171,6 +198,8 @@ SELECT COUNT(*) AS expect_0 FROM t1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 0 FROM t1; --source include/wait_condition.inc SELECT VARIABLE_VALUE expect_Primary FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_status'; @@ -202,6 +231,8 @@ SELECT COUNT(*) expect_0 FROM t1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 0 FROM t1; --source include/wait_condition.inc SELECT VARIABLE_VALUE = 'Primary' FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_status'; @@ -219,6 +250,10 @@ INSERT INTO child VALUES (1,1),(2,2),(3,3); --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/parent'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/child'; +--source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 3 FROM child; --source include/wait_condition.inc @@ -233,6 +268,10 @@ SELECT COUNT(*) AS expect_0 FROM child; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/parent'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/child'; +--source include/wait_condition.inc --let $wait_condition = SELECT COUNT(*) = 0 FROM child; --source include/wait_condition.inc SELECT VARIABLE_VALUE = 'Primary' FROM performance_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_status'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_notify_ssl_ipv6.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_notify_ssl_ipv6.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_notify_ssl_ipv6.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_notify_ssl_ipv6.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,7 +6,7 @@ ssl-ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem [mysqld.1] -wsrep_provider_options='base_host=[::1];base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port;repl.causal_read_timeout=PT90S;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_incoming_address='[::1]:@mysqld.1.port' wsrep_node_address=[::1]:@mysqld.1.#galera_port wsrep_sst_receive_address='[::1]:@mysqld.1.#sst_port' @@ -14,7 +14,7 @@ [mysqld.2] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port;repl.causal_read_timeout=PT90S;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_incoming_address='[::1]:@mysqld.2.port' wsrep_node_address=[::1]:@mysqld.2.#galera_port wsrep_sst_receive_address='[::1]:@mysqld.2.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_replicate_aria_off.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_aria_off.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_replicate_aria_off.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_aria_off.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,7 +3,6 @@ # --source include/galera_cluster.inc ---source include/have_innodb.inc --source include/have_aria.inc CREATE TABLE t1 (f1 INT PRIMARY KEY) Engine=Aria; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_replicate_aria_on.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_aria_on.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_replicate_aria_on.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_aria_on.test 2025-05-19 16:14:24.000000000 +0000 @@ -234,4 +234,3 @@ --connection node_2 SET GLOBAL wsrep_mode = DEFAULT; --enable_query_log - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_replicate_myisam_off.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_myisam_off.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_replicate_myisam_off.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_myisam_off.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,7 +3,6 @@ # --source include/galera_cluster.inc ---source include/have_innodb.inc CREATE TABLE t1 (f1 INT PRIMARY KEY) Engine=MyISAM; INSERT INTO t1 VALUES (1); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_replicate_myisam_on.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_myisam_on.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_replicate_myisam_on.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_replicate_myisam_on.test 2025-05-19 16:14:24.000000000 +0000 @@ -21,6 +21,11 @@ INSERT INTO t1 SELECT 4 FROM DUAL UNION ALL SELECT 5 FROM DUAL; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 5 FROM t1; +--source include/wait_condition.inc + SELECT COUNT(*) AS EXPECT_5 FROM t1; DROP TABLE t1; @@ -36,6 +41,13 @@ REPLACE INTO t1 SELECT 3, 'yyy' FROM DUAL; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 3 FROM t1; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 3 AND f2 = 'yyy'; +--source include/wait_condition.inc + SELECT COUNT(*) AS EXPECT_3 FROM t1; SELECT COUNT(*) AS EXPECT_1 FROM t1 WHERE f1 = 1 AND f2 = 'klm'; SELECT COUNT(*) AS EXPECT_1 FROM t1 WHERE f1 = 2 AND f2 = 'xyz'; @@ -49,6 +61,9 @@ UPDATE t1 SET f2 = 'zzz' WHERE f2 = 'yyy'; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1 WHERE f2 = 'zzz'; +--source include/wait_condition.inc + SELECT COUNT(*) AS EXPECT_1 FROM t1 WHERE f2 = 'zzz'; # @@ -59,6 +74,9 @@ DELETE FROM t1 WHERE f2 = 'zzz'; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM t1 WHERE f2 = 'zzz'; +--source include/wait_condition.inc + SELECT COUNT(*) AS EXPECT_0 FROM t1 WHERE f2 = 'zzz'; # @@ -69,6 +87,9 @@ TRUNCATE TABLE t1; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM t1; +--source include/wait_condition.inc + SELECT COUNT(*) AS EXPECT_0 FROM t1; DROP TABLE t1; @@ -77,8 +98,8 @@ # --connection node_1 -CREATE TABLE t1 (f1 INTEGER) ENGINE=MyISAM; -CREATE TABLE t2 (f1 INTEGER) ENGINE=InnoDB; +CREATE TABLE t1 (f1 INTEGER NOT NULL PRIMARY KEY) ENGINE=MyISAM; +CREATE TABLE t2 (f1 INTEGER NOT NULL PRIMARY KEY) ENGINE=InnoDB; SET AUTOCOMMIT=OFF; START TRANSACTION; INSERT INTO t1 VALUES (1); @@ -86,6 +107,15 @@ COMMIT; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM t2; +--source include/wait_condition.inc + SELECT COUNT(*) AS EXPECT_1 FROM t1; SELECT COUNT(*) AS EXPECT_1 FROM t2; @@ -100,6 +130,11 @@ ROLLBACK; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 2 FROM t1; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM t2; +--source include/wait_condition.inc + SELECT COUNT(*) AS EXPECT_2 FROM t1; SELECT COUNT(*) AS EXPECT_1 FROM t2; @@ -119,13 +154,20 @@ INSERT INTO t2 VALUES (1); --connection node_2 -# The MyISAM update is replicated immediately, so a duplicate key error happens even before the COMMIT +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1; +--source include/wait_condition.inc +# The MyISAM update is replicated when executed, so a duplicate key error happens even before the COMMIT --error ER_DUP_ENTRY INSERT INTO t1 VALUES (1); --connection node_1 COMMIT; DROP TABLE t1, t2; + # # Test prepared staments # @@ -146,6 +188,10 @@ SELECT * FROM t1 ORDER BY id; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 11 FROM t1; +--source include/wait_condition.inc SELECT * FROM t1 ORDER BY id; DROP TABLE t1; @@ -172,6 +218,10 @@ SELECT * FROM t1 ORDER BY id; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 11 FROM t1; +--source include/wait_condition.inc SELECT * FROM t1 ORDER BY id; DROP PROCEDURE proc; @@ -195,26 +245,46 @@ SELECT * FROM t2 ORDER BY id; --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2'; +--source include/wait_condition.inc +SELECT COUNT(*) FROM t1; +--let $wait_condition = SELECT COUNT(*) = 10 FROM t1; +--source include/wait_condition.inc SELECT * FROM t1 ORDER BY id; SELECT * FROM t2 ORDER BY id; DROP TRIGGER tr1; DROP TRIGGER tr2; DROP TRIGGER tr3; -DROP TABLE t1,t2; +DROP TABLE t1, t2; + +CREATE TABLE t1 (a INT, b INT, UNIQUE(a)) ENGINE=MyISAM; +CREATE TRIGGER tr1 BEFORE INSERT ON t1 FOR EACH ROW SET NEW.a=1; +INSERT INTO t1 (a,b) VALUES (10,20); +SELECT * from t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) = 1 FROM t1; +--source include/wait_condition.inc + +SELECT * from t1; +--connection node_1 +DROP TABLE t1; --echo # --echo # MDEV-11152: wsrep_replicate_myisam: SELECT gets replicated using TO --echo # --connection node_1 -CREATE TABLE t1 (i INT) ENGINE=INNODB; +CREATE TABLE t1 (i INT NOT NULL PRIMARY KEY) ENGINE=INNODB; INSERT INTO t1 VALUES(1); # This command should not get replicated. SELECT * FROM t1; DROP TABLE t1; ---connection node_1 ---disable_query_log SET GLOBAL wsrep_mode = DEFAULT; + --connection node_2 SET GLOBAL wsrep_mode = DEFAULT; ---enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_slave_threads.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_slave_threads.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_slave_threads.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_slave_threads.test 2025-05-19 16:14:24.000000000 +0000 @@ -15,11 +15,15 @@ --connection node_1 --let $wsrep_slave_threads_orig = `SELECT @@wsrep_slave_threads` + CREATE TABLE t1 (f1 INT PRIMARY KEY) Engine=InnoDB; CREATE TABLE t2 (f1 INT AUTO_INCREMENT PRIMARY KEY) Engine=InnoDB; --connection node_2 +--let $wsrep_slave_threads_orig_2 = `SELECT @@wsrep_slave_threads` + CALL mtr.add_suppression("WSREP: Refusing exit for the last slave thread\\."); + # Setting wsrep_slave_threads to zero triggers a warning SET GLOBAL wsrep_slave_threads = 0; SHOW WARNINGS; @@ -74,7 +78,9 @@ --let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_applier_thread_count'; --source include/wait_condition.inc ---eval SET GLOBAL wsrep_slave_threads = $wsrep_slave_threads_orig +--disable_query_log +--eval SET GLOBAL wsrep_slave_threads = $wsrep_slave_threads_orig_2; +--enable_query_log DROP TABLE t1; DROP TABLE t2; @@ -94,6 +100,11 @@ SET GLOBAL wsrep_slave_threads = 1; --connection node_1 + +--disable_query_log +--eval SET GLOBAL wsrep_slave_threads = $wsrep_slave_threads_orig; +--enable_query_log + INSERT INTO t1 VALUES (DEFAULT); INSERT INTO t1 VALUES (DEFAULT); INSERT INTO t1 VALUES (DEFAULT); @@ -106,6 +117,10 @@ --connection node_2 +--disable_query_log +--eval SET GLOBAL wsrep_slave_threads = $wsrep_slave_threads_orig_2; +--enable_query_log + # Wait until above DDL is replicated # # make sure that we are left with exactly one applier thread before we leaving the test diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_wsrep_mode.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_wsrep_mode.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_var_wsrep_mode.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_var_wsrep_mode.test 2025-05-19 16:14:24.000000000 +0000 @@ -57,9 +57,3 @@ # reset SET GLOBAL wsrep_mode=DEFAULT; - - - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_during_ist.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_during_ist.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_during_ist.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_during_ist.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,20 @@ +!include ../galera_4nodes.cnf + +[mysqld] +wsrep-ignore-apply-errors=0 + +[mysqld.1] +wsrep_node_name='node_1' + +[mysqld.2] +wsrep_node_name='node_2' + +[mysqld.3] +wsrep_node_name='node_3' + +[mysqld.4] +wsrep_node_name='node_4' +wsrep_sst_donor='node_1' + +[ENV] +galera_cluster_size=4 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_during_ist.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_during_ist.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_during_ist.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_during_ist.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,165 @@ +# +# Test a case where a joiner encounters an error during IST +# Instead of voting it should assume error and bail out. +# + +--source include/galera_cluster.inc +--source include/big_test.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +# Make sure that the test is operating on the right version of galera library. +--let $galera_version=26.4.19 +source ../wsrep/include/check_galera_version.inc; + +--let $node_1=node_1 +--let $node_2=node_2 +--let $node_3=node_3 +--let $node_4=node_4 +--source ../include/auto_increment_offset_save.inc + +# create table t1 and procedure p1 to generate wirtesets +--connection node_1 +CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY); + +DELIMITER |; +CREATE PROCEDURE p1(IN max INT) +BEGIN + DECLARE i INT; + DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; + + SET i = 0; + WHILE i < max DO + INSERT IGNORE INTO t1 VALUES (DEFAULT); + SET i = i + 1; + END WHILE; +END| +DELIMITER ;| + +CALL p1(130); + +--connection node_4 +--echo Shutting down server 4... +--let $node_4_server_id= `SELECT @@server_id` +--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect +--let $node_4_pid_file= `SELECT @@pid_file` +--source include/shutdown_mysqld.inc + +# Wait for node #4 to leave cluster +--let $members = 3 +--connection node_1 +--source include/wsrep_wait_membership.inc +--connection node_2 +--source include/wsrep_wait_membership.inc +--connection node_3 +--source include/wsrep_wait_membership.inc +--echo Server 4 left the cluster + +# Create some writesets for IST +--connection node_1 +CALL p1(130); + +# Create a writeset that node 4 won't be able to apply by creating a table +# that won't be present in the replication stream +--connection node_1 +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; + +--connection node_2 +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; + +--connection node_3 +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; + +# This should cause error during IST +INSERT INTO t2 VALUES (DEFAULT); + +# make sure nodes 1,2,3 progress far enough for commit cut update +CALL p1(130); + +--connection node_1 +# prepare to stop SST donor thread when it receives a request from starting node #4 +SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation"; + +--echo Restarting server 4 +# Need to use this form instead of start_mysqld.inc because the latter is blocking +--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name + +--echo Wait for server 1 to become a donor +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached"; +--echo Server 1 got SST request from server 4 +SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue"; +SET GLOBAL debug = ""; +SET DEBUG_SYNC='RESET'; + +# +# After this point node #4 shall proceed to IST and bail out +# + +--echo Waiting for server 4 to leave the cluster +--let $members = 3 +--source include/wsrep_wait_membership.inc +--connection node_2 +--source include/wsrep_wait_membership.inc +--connection node_3 +--source include/wsrep_wait_membership.inc + +--connection node_4 +--echo Server 4 left the cluster, killing it... + +# Kill the connected server +--exec echo "wait" > $node_4_expect_file_name +--let KILL_NODE_PIDFILE = $node_4_pid_file +--perl + my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; + my $mysqld_pid = `cat $pid_filename`; + chomp($mysqld_pid); + system("kill -9 $mysqld_pid"); + exit(0); +EOF +--echo Killed server 4... +--source include/wait_until_disconnected.inc +--echo Restarting server 4... +--source include/start_mysqld.inc +--source include/galera_wait_ready.inc + +# Confirm node #4 has rejoined +--connection node_1 +--let $members = 4 +--source include/wsrep_wait_membership.inc + +# Confirm that all is good and all nodes have identical data + +--connection node_1 +SELECT count(*) AS expect1_390 FROM t1; +SELECT count(*) AS expect1_1 FROM t2; + +--connection node_2 +SELECT count(*) AS expect2_390 FROM t1; +SELECT count(*) AS expect2_1 FROM t2; + +--connection node_3 +SELECT count(*) AS expect3_390 FROM t1; +SELECT count(*) AS expect3_1 FROM t2; + +--connection node_4 +SELECT count(*) AS expect4_390 FROM t1; +SELECT count(*) AS expect4_1 FROM t2; + +DROP TABLE t1; +DROP TABLE t2; +DROP PROCEDURE p1; + +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); +CALL mtr.add_suppression("Inconsistency detected: Failed on preordered"); +CALL mtr.add_suppression("Failed to apply write set"); +CALL mtr.add_suppression("Sending JOIN failed: -103"); +CALL mtr.add_suppression("Failed to JOIN the cluster after SST"); + +--source ../include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_apply.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_apply.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_apply.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_apply.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,21 @@ +!include ../galera_4nodes.cnf + +[mysqld] +wsrep-ignore-apply-errors=0 + +[mysqld.1] +wsrep_node_name='node_1' + +[mysqld.2] +wsrep_node_name='node_2' + +[mysqld.3] +wsrep_node_name='node_3' + +[mysqld.4] +wsrep_node_name='node_4' +wsrep_sst_donor='node_1' + +[ENV] +galera_cluster_size=4 +MTR_SST_JOINER_DELAY=20 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_apply.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_apply.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_apply.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_apply.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,73 @@ +# +# Test a case where a vote happens in JOINED state after SST on a writeset +# that should be applied. +# + +--source galera_vote_joined_begin.inc +# +# At this point state snapshot has been copied, node 1 is operational and +# we have about 10 seconds while everything we do will go into the replication +# queue on node 4 which it will have to apply on top of the snapshot. +# + +# Increase replication queue on node_4 +--connection node_1 +CALL p1(130); + +# Create a writeset that node 4 won't be able to apply by creating a table +# that won't be present in the replication stream +--connection node_1 +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; + +--connection node_2 +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; + +--connection node_3 +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; + +# This should cause node #4 to initiate a vote and leave the cluster +INSERT INTO t2 VALUES (DEFAULT); + +# make sure nodes 1,2,3 progress far enough for commit cut update +CALL p1(130); + +--echo Waiting for server 4 to leave the cluster +--let $members = 3 +--source include/wsrep_wait_membership.inc +--connection node_2 +--source include/wsrep_wait_membership.inc +--connection node_1 +--source include/wsrep_wait_membership.inc + +--connection node_4 +--echo Server 4 left the cluster, killing it... +# Kill the connected server +--exec echo "wait" > $node_4_expect_file_name +--let KILL_NODE_PIDFILE = $node_4_pid_file +--perl + my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; + my $mysqld_pid = `cat $pid_filename`; + chomp($mysqld_pid); + system("kill -9 $mysqld_pid"); + exit(0); +EOF +--echo Killed server 4... +--source include/wait_until_disconnected.inc +--echo Restarting server 4... +--source include/start_mysqld.inc +--source include/galera_wait_ready.inc +DROP TABLE t2; + +--source galera_vote_joined_end.inc + +--connection node_4 +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); +CALL mtr.add_suppression("Inconsistency detected: Inconsistent by consensus"); +CALL mtr.add_suppression("Failed to apply write set: gtid:"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_begin.inc mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_begin.inc --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_begin.inc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_begin.inc 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,79 @@ +# This file purpose is to set up node 4 to require SST which is artificaially +# prolonged and as a result accumulate sufficient relication queue. +# The contents of the qeuee are controlled in the sourcing test files. + +--source include/galera_cluster.inc +--source include/big_test.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +# Make sure that the test is operating on the right version of galera library. +--let $galera_version=26.4.19 +source ../wsrep/include/check_galera_version.inc; + +--let $node_1=node_1 +--let $node_2=node_2 +--let $node_3=node_3 +--let $node_4=node_4 +--source ../include/auto_increment_offset_save.inc + +# create table t1 and procedure p1 to generate wirtesets +--connection node_1 +CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY); + +DELIMITER |; +CREATE PROCEDURE p1(IN max INT) +BEGIN + DECLARE i INT; + DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END; + + SET i = 0; + WHILE i < max DO + INSERT IGNORE INTO t1 VALUES (DEFAULT); + SET i = i + 1; + END WHILE; +END| +DELIMITER ;| + +# 130 events move the commit cut, it is essential in voting +CALL p1(130); + +--connection node_4 +--echo Shutting down server 4... +--let $node_4_server_id= `SELECT @@server_id` +--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect +--let $node_4_pid_file= `SELECT @@pid_file` +--source include/shutdown_mysqld.inc +# enforce SST +--exec rm -rf $MYSQLTEST_VARDIR/mysqld.4/data/grastate.dat + +# Wait for node #4 to leave cluster +--connection node_1 +--let $members = 3 +--source include/wsrep_wait_membership.inc + +# prepare to stop SST donor thread when node is in donor state +SET GLOBAL debug = "+d,sync.wsrep_donor_state"; + +--connection node_4 +--echo Restarting server 4... +# Need to use this form instead of start_mysqld.inc because the latter is blocking +--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name + +# Wait for node #1 to become a donor +--connection node_1 +SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_donor_state_reached"; +--echo Tables on server 1 flushed and locked for SST to server 4 +SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_donor_state"; +SET GLOBAL debug = ""; +SET DEBUG_SYNC='RESET'; + +--echo Wait for the state snapshot to be copied to server 4 +--source include/galera_wait_ready.inc +--echo SST script unlocked server 1 + +# +# At this point state snapshot has been copied, node 1 is operational and +# we have about 20 seconds while everything we do will go into the replication +# queue on node 4 which it will have to apply on top of the snapshot. +# diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_end.inc mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_end.inc --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_end.inc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_end.inc 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,33 @@ +# Confirm node #4 has rejoined +--connection node_1 +--let $members = 4 +--source include/wsrep_wait_membership.inc +#DROP TABLE IF EXISTS t2; + +# Confirm that all is good and all nodes have identical data + +--connection node_1 +SELECT count(*) AS expect1_390 FROM t1; + +#CALL mtr.add_suppression("Replica SQL: Could not execute Delete_rows"); +#CALL mtr.add_suppression("Event 3 Delete_rows apply failed: 120, seqno [0-9]+"); + +--connection node_2 +SELECT count(*) AS expect2_390 FROM t1; + +#CALL mtr.add_suppression("mysqld: Can't find record in 't1'"); +#CALL mtr.add_suppression("Replica SQL: Could not execute Delete_rows"); +#CALL mtr.add_suppression("Event 3 Delete_rows apply failed: 120, seqno seqno [0-9]+"); + +--connection node_3 +SELECT count(*) AS expect3_390 FROM t1; + +--connection node_4 +SELECT count(*) AS expect4_390 FROM t1; + +DROP TABLE t1; +DROP PROCEDURE p1; + +#CALL mtr.add_suppression("inconsistent with group"); + +--source ../include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_skip.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_skip.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_skip.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_skip.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,21 @@ +!include ../galera_4nodes.cnf + +[mysqld] +wsrep-ignore-apply-errors=0 + +[mysqld.1] +wsrep_node_name='node_1' + +[mysqld.2] +wsrep_node_name='node_2' + +[mysqld.3] +wsrep_node_name='node_3' + +[mysqld.4] +wsrep_node_name='node_4' +wsrep_sst_donor='node_1' + +[ENV] +galera_cluster_size=4 +MTR_SST_JOINER_DELAY=20 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_skip.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_skip.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_joined_skip.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_joined_skip.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,100 @@ +# +# Test a case where a vote happens in JOINED state after SST on a writeset +# that should be skipped. I.e. JOINED node should continue operation. +# + +--source galera_vote_joined_begin.inc +# +# At this point state snapshot has been copied, node 1 is operational and +# we have about 10 seconds while everything we do will go into the replication +# queue on node 4 which it will have to apply on top of the snapshot. +# + +# Increase replication queue on node_4 +--connection node_1 +CALL p1(130); + +# +# Create a writeset that node 4 won't be able to apply by making node 3 +# inconsisitent +# +--connection node_3 +--let $node_3_server_id= `SELECT @@server_id` +--let $node_3_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_3_server_id.expect +--let $node_3_pid_file= `SELECT @@pid_file` +SET SESSION wsrep_on = OFF; +CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY); +SET SESSION wsrep_on = ON; + +# This should cause nodes #1 and #2 to initiate a vote and kick node #3 +# out of the cluster, node #4 should recover the vote when fails to apply +# the event and continue +INSERT INTO t2 VALUES (DEFAULT); +SET SESSION wsrep_on = OFF; + +# make sure nodes 1,2 progress far enough for commit cut update +--connection node_1 +CALL p1(130); + +--let $members = 3 +--echo Waiting for server 3 to leave the cluster +--connection node_1 +--source include/wsrep_wait_membership.inc +--connection node_2 +--source include/wsrep_wait_membership.inc +--connection node_4 +# need to wait for extra SST delay on joiner +--sleep $MTR_SST_JOINER_DELAY +--sleep $MTR_SST_JOINER_DELAY +--enable_reconnect +--let $wait_timeout = 60 +--source include/wsrep_wait_membership.inc + +--connection node_3 +--echo Server 3 left the cluster, killing it... +# Kill the connected server +--exec echo "wait" > $node_3_expect_file_name +--let KILL_NODE_PIDFILE = $node_3_pid_file +--perl + my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; + my $mysqld_pid = `cat $pid_filename`; + chomp($mysqld_pid); + system("kill -9 $mysqld_pid"); + exit(0); +EOF +--echo Killed server 3. +--source include/wait_until_disconnected.inc +--echo Restarting server 3... +--exec echo "restart:$start_mysqld_params" > $node_3_expect_file_name + +--echo Waiting for server 3 to rejoin the cluster +--connection node_1 +--let $members = 3 +--source include/wsrep_wait_membership.inc + +--connection node_3 +--echo sleeping for $MTR_SST_JOINER_DELAY +# need to wait for extra SST delay on joiner +--sleep $MTR_SST_JOINER_DELAY +--sleep $MTR_SST_JOINER_DELAY +--echo Waiting ready +--enable_reconnect +--source include/galera_wait_ready.inc +--echo Server 3 restarted. + +--source galera_vote_joined_end.inc + +--connection node_1 +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); + +--connection node_2 +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); + +--connection node_3 +CALL mtr.add_suppression("Vote 0 \\(success\\) on .+ is inconsistent with group"); + +--connection node_4 +CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table "); +CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_vote_rejoin_ddl.test 2025-05-19 16:14:24.000000000 +0000 @@ -91,10 +91,6 @@ DROP TABLE t2; ---let $node_3=node_3 ---let $auto_increment_offset_node_3 = 3; ---let $node_4=node_4 ---let $auto_increment_offset_node_4 = 4; --source suite/galera/include/auto_increment_offset_restore.inc --disconnect node_3 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_wan.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_wan.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,14 +4,13 @@ loose-galera-wan=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=10M;gmcast.segment=1' +wsrep_provider_options='gmcast.segment=1;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=10M;gmcast.segment=1' +wsrep_provider_options='gmcast.segment=1;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.3] -wsrep_provider_options='base_port=@mysqld.3.#galera_port;gcache.size=10M;gmcast.segment=2' +wsrep_provider_options='gmcast.segment=2;repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.4] -wsrep_provider_options='base_port=@mysqld.4.#galera_port;gcache.size=10M;gmcast.segment=3' - +wsrep_provider_options='gmcast.segment=3;repl.causal_read_timeout=PT90S;base_port=@mysqld.4.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_wan.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_wan.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan.test 2025-05-19 16:14:24.000000000 +0000 @@ -10,10 +10,10 @@ --source include/have_innodb.inc --source include/force_restart.inc -CALL mtr.add_suppression("WSREP: Stray state UUID msg:"); -CALL mtr.add_suppression("Sending JOIN failed: "); -CALL mtr.add_suppression("WSREP: .* sending install message failed: Socket is not connected"); -CALL mtr.add_suppression("There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside"); +CALL mtr.add_suppression("WSREP: Stray state UUID msg: "); +CALL mtr.add_suppression("WSREP: .*Sending JOIN failed: "); +CALL mtr.add_suppression("WSREP: .*sending install message failed: (Transport endpoint|Socket) is not connected"); +CALL mtr.add_suppression("WSREP: .*There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside"); --let $wait_condition = SELECT VARIABLE_VALUE = 4 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; --source include/wait_condition.inc @@ -42,8 +42,8 @@ DROP TABLE t1; --connection node_1 -call mtr.add_suppression("WSREP: read_completion_condition.*"); -call mtr.add_suppression("WSREP: read_handler.*"); +call mtr.add_suppression("WSREP: read_completion_condition"); +call mtr.add_suppression("WSREP: read_handler"); --disconnect node_3 --disconnect node_4 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_wan_restart_ist.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan_restart_ist.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_wan_restart_ist.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan_restart_ist.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,14 +4,13 @@ loose-galera-wan-restart-ist=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gmcast.segment=1' +wsrep_provider_options='gmcast.segment=1;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gmcast.segment=1' +wsrep_provider_options='gmcast.segment=1;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.3] -wsrep_provider_options='base_port=@mysqld.3.#galera_port;gmcast.segment=2' +wsrep_provider_options='gmcast.segment=2;repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.4] -wsrep_provider_options='base_port=@mysqld.4.#galera_port;gmcast.segment=2' - +wsrep_provider_options='gmcast.segment=2;repl.causal_read_timeout=PT90S;base_port=@mysqld.4.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_wan_restart_sst.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan_restart_sst.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_wan_restart_sst.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_wan_restart_sst.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,14 +4,13 @@ loose-galera-wan-restart-sst=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gmcast.segment=1' +wsrep_provider_options='gmcast.segment=1;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gmcast.segment=1' +wsrep_provider_options='gmcast.segment=1;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.3] -wsrep_provider_options='base_port=@mysqld.3.#galera_port;gmcast.segment=2' +wsrep_provider_options='gmcast.segment=2;repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.4] -wsrep_provider_options='base_port=@mysqld.4.#galera_port;gmcast.segment=2' - +wsrep_provider_options='gmcast.segment=2;repl.causal_read_timeout=PT90S;base_port=@mysqld.4.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_wsrep_log_conficts.cnf mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_log_conficts.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_wsrep_log_conficts.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_log_conficts.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,8 +5,3 @@ [mysqld.2] wsrep_log_conflicts=ON - - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_wsrep_mode.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_mode.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_wsrep_mode.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_mode.test 2025-05-19 16:14:24.000000000 +0000 @@ -16,7 +16,6 @@ DROP TABLE t1; SET GLOBAL wsrep_mode = default; - # MDEV-25698 SIGSEGV in wsrep_should_replicate_ddl SET GLOBAL wsrep_mode = STRICT_REPLICATION; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_wsrep_provider_options_syntax.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_provider_options_syntax.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_wsrep_provider_options_syntax.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_provider_options_syntax.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,10 +3,11 @@ # --source include/galera_cluster.inc --source include/have_innodb.inc + --let LOGF=$MYSQLTEST_VARDIR/log/mysqld.1.err --disable_info -call mtr.add_suppression("WSREP\: Unknown parameter 'gmcasts\\.segment'"); -call mtr.add_suppression("WSREP\: Set options returned 7"); +call mtr.add_suppression("WSREP: Unknown parameter 'gmcasts\\.segment'"); +call mtr.add_suppression("WSREP: Set options returned 7"); --error ER_WRONG_ARGUMENTS SET GLOBAL wsrep_provider_options="gmcasts.segment=1"; # Search for unhandled exception message. diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/galera_wsrep_schema_detached.test mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_schema_detached.test --- mariadb-10.11.11/mysql-test/suite/galera/t/galera_wsrep_schema_detached.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/galera_wsrep_schema_detached.test 2025-05-19 16:14:24.000000000 +0000 @@ -6,11 +6,22 @@ --source include/auto_increment_offset_save.inc --connection node_1 -call mtr.add_suppression("WSREP:.*"); + +call mtr.add_suppression("WSREP: async IST sender failed to serve"); +call mtr.add_suppression("WSREP: Failed to establish connection: Connection refused"); +call mtr.add_suppression("WSREP: IST failed: IST sender, failed to connect"); +call mtr.add_suppression("WSREP: .*State transfer.* failed: Protocol error"); + SET @wsrep_provider_options_orig = @@GLOBAL.wsrep_provider_options; SET GLOBAL wsrep_provider_options ='pc.ignore_sb=true;pc.weight=2'; --connection node_2 + +call mtr.add_suppression("WSREP: async IST sender failed to serve"); +call mtr.add_suppression("WSREP: Failed to establish connection: Connection refused"); +call mtr.add_suppression("WSREP: IST failed: IST sender, failed to connect"); +call mtr.add_suppression("WSREP: .*State transfer.* failed: Protocol error"); + SET @wsrep_cluster_address_orig = @@GLOBAL.wsrep_cluster_address; SET GLOBAL WSREP_ON=0; SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; @@ -22,6 +33,7 @@ SET GLOBAL wsrep_cluster_address = @wsrep_cluster_address_orig; SELECT 1; DELETE FROM mysql.wsrep_allowlist; + --connection node_2 --source include/kill_galera.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mdev-29775.test mariadb-10.11.13/mysql-test/suite/galera/t/mdev-29775.test --- mariadb-10.11.11/mysql-test/suite/galera/t/mdev-29775.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mdev-29775.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,81 @@ +--source include/galera_cluster.inc +--source include/have_aria.inc + +# +# MDEV-29775 : Assertion `0' failed in void Protocol::end_statement() when adding data to the MyISAM table after setting wsrep_mode=replicate_myisam +# +SET GLOBAL wsrep_mode=REPLICATE_MYISAM; +CREATE TABLE t (f0 CHAR(0)) ENGINE=MyISAM; +INSERT INTO t VALUES(); +SELECT * FROM t; +--connection node_2 +SELECT * FROM t; +DROP TABLE t; + +--connection node_1 +SET GLOBAL wsrep_mode=REPLICATE_MYISAM; +SET GLOBAL wsrep_forced_binlog_format=ROW; +CREATE TABLE t (f0 CHAR(0)) ENGINE=MyISAM; +INSERT INTO t VALUES(); +SELECT * FROM t; +--connection node_2 +SELECT * FROM t; +DROP TABLE t; + +--connection node_1 +SET GLOBAL wsrep_mode=REPLICATE_ARIA; +CREATE TABLE t (f0 CHAR(0)) ENGINE=Aria; +INSERT INTO t VALUES(); +SELECT * FROM t; +--connection node_2 +SELECT * FROM t; +DROP TABLE t; + +--connection node_1 +SET GLOBAL wsrep_mode=REPLICATE_ARIA; +SET GLOBAL wsrep_forced_binlog_format=ROW; +CREATE TABLE t (f0 CHAR(0)) ENGINE=Aria; +INSERT INTO t VALUES(); +SELECT * FROM t; +--connection node_2 +SELECT * FROM t; +DROP TABLE t; + +--connection node_1 +SET GLOBAL wsrep_mode=REPLICATE_MYISAM; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_forced_binlog_format=MIXED; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_forced_binlog_format=STATEMENT; + +SET GLOBAL wsrep_mode=REPLICATE_ARIA; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_forced_binlog_format=MIXED; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_forced_binlog_format=STATEMENT; + +SET GLOBAL wsrep_mode=DEFAULT; +SET GLOBAL wsrep_forced_binlog_format=MIXED; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_mode = REPLICATE_MYISAM; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_mode = REPLICATE_ARIA; + +SET GLOBAL wsrep_mode=DEFAULT; +SET GLOBAL wsrep_forced_binlog_format=STATEMENT; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_mode = REPLICATE_MYISAM; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_mode = REPLICATE_ARIA; + +SET GLOBAL wsrep_forced_binlog_format=DEFAULT; +SET GLOBAL wsrep_mode=DEFAULT; +SET GLOBAL wsrep_forced_binlog_format=MIXED; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_mode = REPLICATE_MYISAM; +SET GLOBAL wsrep_forced_binlog_format=STATEMENT; +--error ER_WRONG_ARGUMENTS +SET GLOBAL wsrep_mode = REPLICATE_MYISAM; + +SET GLOBAL wsrep_forced_binlog_format=DEFAULT; +SET GLOBAL wsrep_mode=DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mdev-30653.test mariadb-10.11.13/mysql-test/suite/galera/t/mdev-30653.test --- mariadb-10.11.11/mysql-test/suite/galera/t/mdev-30653.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mdev-30653.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_aria.inc create table t1 (id serial, val int) engine=innodb; @@ -6,7 +7,8 @@ insert into t1 values(1, 23); insert into t2 values(2, 42); -call mtr.add_suppression("WSREP: Replication of non-transactional engines is experimental. Storage engine Aria for table 'test'.'t2' is not supported in Galera"); + +call mtr.add_suppression("WSREP: Replication of non-transactional engines is experimental\\. Storage engine Aria for table 'test'\\.'t2' is not supported in Galera"); begin; update t1 set val=24 where id=1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mdev-31285.test mariadb-10.11.13/mysql-test/suite/galera/t/mdev-31285.test --- mariadb-10.11.11/mysql-test/suite/galera/t/mdev-31285.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mdev-31285.test 2025-05-19 16:14:24.000000000 +0000 @@ -11,5 +11,3 @@ --connection node_2 --error ER_NO_SUCH_TABLE SHOW CREATE TABLE t; - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#198.cnf mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#198.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#198.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#198.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,9 +1,4 @@ !include ../galera_2nodes.cnf -[mysqld.1] +[mysqld] log-bin -wsrep-debug=1 - -[mysqld.1] -log-bin -wsrep-debug=1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#198.test mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#198.test --- mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#198.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#198.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,5 @@ --source include/galera_cluster.inc --source include/have_innodb.inc ---source include/force_restart.inc CREATE TABLE t1 (id INT PRIMARY KEY) ENGINE=InnoDB; CREATE TABLE t2 (id INT PRIMARY KEY) ENGINE=InnoDB; @@ -21,8 +20,9 @@ --connection node_2 SET SESSION wsrep_sync_wait = 0; ---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE = 'Waiting for table metadata lock' ---source include/wait_condition.inc +--let $wait_condition = SELECT COUNT(*) BETWEEN 1 AND 2 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE 'Waiting for table metadata lock%' OR STATE LIKE 'Waiting to execute in isolation%'; +--let $wait_condition_on_error_output = SELECT * FROM INFORMATION_SCHEMA.PROCESSLIST +--source include/wait_condition_with_debug_and_kill.inc --connection node_1 INSERT INTO t2 VALUES (1); @@ -38,3 +38,8 @@ DROP TABLE t1; DROP TABLE t2; + +--connection node_1 + +--disconnect node_2a +--disconnect node_2b diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#201.cnf mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#201.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#201.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#201.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,5 +5,3 @@ [mysqld.2] query_cache_type=1 - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#247.test mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#247.test --- mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#247.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#247.test 2025-05-19 16:14:24.000000000 +0000 @@ -20,4 +20,3 @@ --sleep 1 DROP TABLE t1; SHOW VARIABLES LIKE 'wsrep_desync'; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#31.test mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#31.test --- mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#31.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#31.test 2025-05-19 16:14:24.000000000 +0000 @@ -49,5 +49,3 @@ --source include/auto_increment_offset_restore.inc --source include/galera_end.inc - - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#33.cnf mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#33.cnf --- mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#33.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#33.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,8 +1,7 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;pc.ignore_sb=true' - +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#332.test mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#332.test --- mariadb-10.11.11/mysql-test/suite/galera/t/mysql-wsrep#332.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/mysql-wsrep#332.test 2025-05-19 16:14:24.000000000 +0000 @@ -216,4 +216,3 @@ DROP TABLE c; DROP TABLE p1; DROP TABLE p2; - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/rename.test mariadb-10.11.13/mysql-test/suite/galera/t/rename.test --- mariadb-10.11.11/mysql-test/suite/galera/t/rename.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/rename.test 2025-05-19 16:14:24.000000000 +0000 @@ -50,4 +50,3 @@ DROP TABLE t2; --echo # End of tests - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/view.test mariadb-10.11.13/mysql-test/suite/galera/t/view.test --- mariadb-10.11.11/mysql-test/suite/galera/t/view.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/view.test 2025-05-19 16:14:24.000000000 +0000 @@ -47,4 +47,3 @@ DROP TABLE t1; --echo # End of tests - diff -Nru mariadb-10.11.11/mysql-test/suite/galera/t/wsrep_mode_strict_replication.test mariadb-10.11.13/mysql-test/suite/galera/t/wsrep_mode_strict_replication.test --- mariadb-10.11.11/mysql-test/suite/galera/t/wsrep_mode_strict_replication.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera/t/wsrep_mode_strict_replication.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/galera_cluster.inc +--source include/have_aria.inc call mtr.add_suppression("WSREP: ALTER TABLE isolation failure"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/disabled.def mariadb-10.11.13/mysql-test/suite/galera_3nodes/disabled.def --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/disabled.def 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/disabled.def 2025-05-19 16:14:24.000000000 +0000 @@ -9,6 +9,3 @@ # Do not use any TAB characters for whitespace. # ############################################################################## - -galera_2_cluster : MDEV-32631 galera_2_cluster: before_rollback(): Assertion `0' failed -galera_nbo_master_phase_two_crash : MENT-2215 Test failure on galera_3nodes.galera_nbo_master_non_prim_failure diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/galera_2x3nodes.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/galera_2x3nodes.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/galera_2x3nodes.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/galera_2x3nodes.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -24,6 +24,7 @@ #sst_port=@OPT.port wsrep_cluster_address=gcomm:// wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' + wsrep_node_address='127.0.0.1:@mysqld.1.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.1.port wsrep_sst_receive_address='127.0.0.1:@mysqld.1.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/galera_3nodes.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/galera_3nodes.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/galera_3nodes.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/galera_3nodes.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -19,10 +19,11 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address=gcomm:// -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.1.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.1.port wsrep_sst_receive_address='127.0.0.1:@mysqld.1.#sst_port' +wsrep_node_name=node1 [mysqld.2] wsrep-on=1 @@ -30,10 +31,11 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.2.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.2.port wsrep_sst_receive_address='127.0.0.1:@mysqld.2.#sst_port' +wsrep_node_name=node2 [mysqld.3] wsrep-on=1 @@ -41,10 +43,11 @@ #ist_port=@OPT.port #sst_port=@OPT.port wsrep_cluster_address='gcomm://127.0.0.1:@mysqld.1.#galera_port' -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;gcache.size=10M' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_node_address='127.0.0.1:@mysqld.3.#galera_port' wsrep_node_incoming_address=127.0.0.1:@mysqld.3.port wsrep_sst_receive_address='127.0.0.1:@mysqld.3.#sst_port' +wsrep_node_name=node3 [sst] sst-log-archive-dir=@ENV.MYSQLTEST_VARDIR/log diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/MDEV-36360.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/MDEV-36360.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/MDEV-36360.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/MDEV-36360.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,61 @@ +connection node_2; +connection node_1; +connection node_1; +connection node_2; +connection node_3; +connection node_1; +CREATE TABLE parent ( +id INT PRIMARY KEY +) ENGINE=InnoDB; +CREATE TABLE child ( +id INT PRIMARY KEY, +parent_id INT, +KEY (parent_id), +CONSTRAINT FOREIGN KEY (parent_id) REFERENCES parent(id) +) ENGINE=InnoDB; +INSERT INTO parent VALUES (1), (2); +connection node_3; +SET SESSION wsrep_on = OFF; +DELETE FROM parent WHERE id = 1; +SET SESSION wsrep_on = ON; +Restarting server 3 with one applier thread having FK and UK checks disabled +SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_after_write_row'; +connection node_1; +INSERT INTO child VALUES (1, 1); +connection node_3; +SET DEBUG_SYNC = 'now WAIT_FOR sync.wsrep_after_write_row_reached'; +SET GLOBAL DEBUG_DBUG = ''; +SET wsrep_sync_wait = 0; +SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL signal.wsrep_after_write_row'; +INSERT INTO child VALUES (2, 2); +SET DEBUG_SYNC = 'RESET'; +include/assert_grep.inc [no FK constraint failure] +Server 3 +SELECT COUNT(*) AS EXPECT_1 FROM parent; +EXPECT_1 +1 +SELECT COUNT(*) AS EXPECT_2 FROM child; +EXPECT_2 +2 +connection node_1; +Server 1 +SET wsrep_sync_wait = 15; +SELECT COUNT(*) AS EXPECT_2 FROM parent; +EXPECT_2 +2 +SELECT COUNT(*) AS EXPECT_2 FROM child; +EXPECT_2 +2 +connection node_2; +Server 2 +SET wsrep_sync_wait = 15; +SELECT COUNT(*) AS EXPECT_2 FROM parent; +EXPECT_2 +2 +SELECT COUNT(*) AS EXPECT_2 FROM child; +EXPECT_2 +2 +DROP TABLE child; +DROP TABLE parent; +disconnect node_2; +disconnect node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera-features#115.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera-features#115.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera-features#115.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera-features#115.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,41 @@ +connection node_2; +connection node_1; +connection node_1; +connection node_2; +connection node_3; +connection node_2; +SET GLOBAL wsrep_on=OFF; +DROP SCHEMA test; +connection node_3; +SET GLOBAL wsrep_on=OFF; +CREATE TABLE t1 (f1 INTEGER); +connection node_1; +CREATE TABLE t1 (f1 INTEGER); +connection node_1; +SET SESSION wsrep_sync_wait=0; +connection node_2; +SET SESSION wsrep_sync_wait=0; +connection node_3; +SET SESSION wsrep_sync_wait=0; +connection node_1; +SET GLOBAL wsrep_provider_options='pc.bootstrap=YES'; +connection node_2; +disconnect node_2; +connect node_2, 127.0.0.1, root, , mysql, $NODE_MYPORT_2; +# restart +connection node_3; +# restart +connection node_1; +DROP TABLE test.t1; +connection node_2; +CALL mtr.add_suppression("Inconsistent by consensus\\."); +CALL mtr.add_suppression("Error_code: 1049"); +CALL mtr.add_suppression("WSREP: Failed to apply trx: source: "); +CALL mtr.add_suppression("WSREP: Failed to apply app buffer"); +CALL mtr.add_suppression("WSREP: Node consistency compromized, leaving cluster\\.\\.\\."); +connection node_3; +CALL mtr.add_suppression("Inconsistent by consensus\\."); +CALL mtr.add_suppression("Error_code: 1050"); +CALL mtr.add_suppression("WSREP: Failed to apply trx: source: "); +CALL mtr.add_suppression("WSREP: Failed to apply app buffer"); +CALL mtr.add_suppression("WSREP: Node consistency compromized, leaving cluster\\.\\.\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_2_cluster.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_2_cluster.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_2_cluster.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_2_cluster.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,9 +1,9 @@ connection node_2; connection node_1; +connect node_6, 127.0.0.1, root, , test, $NODE_MYPORT_6; connect node_5, 127.0.0.1, root, , test, $NODE_MYPORT_5; connect node_4, 127.0.0.1, root, , test, $NODE_MYPORT_4; connection node_4; -CHANGE MASTER TO master_host='127.0.0.1', master_user='root', master_port=NODE_MYPORT_1, master_use_gtid=current_pos;; START SLAVE; include/wait_for_slave_to_start.inc connection node_1; @@ -21,7 +21,6 @@ SELECT COUNT(*) = 1 FROM t1; COUNT(*) = 1 1 -connect node_6, 127.0.0.1, root, , test, $NODE_MYPORT_6; connection node_6; SELECT COUNT(*) = 1 FROM t1; COUNT(*) = 1 @@ -51,18 +50,30 @@ COUNT(*) = 3 1 connection node_2; +connection node_1; +connection node_3; +connection node_4; +connection node_5; +connection node_6; +connection node_2; OPTIMIZE TABLE t1; Table Op Msg_type Msg_text test.t1 optimize note Table does not support optimize, doing recreate + analyze instead test.t1 optimize status OK +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system variable that may have a different value on the slave connection node_1; +connection node_3; connection node_4; +connection node_5; connection node_6; connection node_1; DROP TABLE t1; connection node_4; STOP SLAVE; RESET SLAVE; +Warnings: +Note 4190 RESET SLAVE is implicitly changing the value of 'Using_Gtid' from 'Current_Pos' to 'Slave_Pos' SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; @@ -75,19 +86,33 @@ SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; -CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); connection node_3; SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; -CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); connection node_5; SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; -CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); connection node_6; SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; +connection node_1; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); +connection node_2; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); +connection node_3; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); +connection node_4; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); +connection node_5; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); +connection node_6; CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_garbd.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_garbd.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_garbd.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_garbd.result 2025-05-19 16:14:24.000000000 +0000 @@ -2,8 +2,6 @@ connection node_1; connection node_1; connection node_2; -connection node_1; -connection node_2; connection node_3; Killing node #3 to free ports for garbd ... connection node_3; @@ -26,8 +24,8 @@ Restarting node #3 to satisfy MTR's end-of-test checks connection node_3; connection node_1; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); connection node_2; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); connection node_3; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,6 @@ connection node_2; connection node_1; connection node_1; -connection node_1; connection node_2; connection node_3; connection node_1; @@ -12,7 +11,6 @@ CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; -connection node_2; Killing node #3 to free ports for garbd ... connection node_3; connection node_1; @@ -34,8 +32,8 @@ connection node_3; connection node_1; connection node_1; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); connection node_2; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); connection node_3; -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_gtid_2_cluster.result 2025-05-19 16:14:24.000000000 +0000 @@ -35,7 +35,7 @@ Variable_name Value wsrep_cluster_size 3 connection node_1; -change master to master_host='127.0.0.1', master_user='root', master_port=NODE_MYPORT_4, master_use_gtid=current_pos, ignore_server_ids=(12,13);; +--- ignore_server_ids=(12,13) start slave; include/wait_for_slave_to_start.inc select @@gtid_binlog_state; @@ -45,7 +45,7 @@ @@gtid_slave_pos connection node_4; -change master to master_host='127.0.0.1', master_user='root', master_port=NODE_MYPORT_1, master_use_gtid=current_pos, ignore_server_ids=(22,23);; +--- ignore_server_ids=(22,23) start slave; include/wait_for_slave_to_start.inc select @@gtid_binlog_state; @@ -262,7 +262,7 @@ reset master; set global wsrep_on=ON; connection node_1; -change master to master_host='127.0.0.1', master_user='root', master_port=NODE_MYPORT_6, master_use_gtid=current_pos, ignore_server_ids=(12,13);; +--- ignore_server_ids=(12,13) start slave; include/wait_for_slave_to_start.inc select @@gtid_binlog_state; @@ -272,7 +272,7 @@ @@gtid_slave_pos connection node_4; -change master to master_host='127.0.0.1', master_user='root', master_port=NODE_MYPORT_3, master_use_gtid=current_pos, ignore_server_ids=(22,23);; +--- ignore_server_ids=(22,23) start slave; include/wait_for_slave_to_start.inc select @@gtid_binlog_state; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_A.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_A.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_A.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_A.result 2025-05-19 16:14:24.000000000 +0000 @@ -77,8 +77,8 @@ SET GLOBAL wsrep_provider_options = 'dbug='; connection node_1; DROP TABLE t1; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); connection node_2; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); connection node_3; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_B.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_B.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_B.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_B.result 2025-05-19 16:14:24.000000000 +0000 @@ -87,11 +87,11 @@ SET GLOBAL wsrep_provider_options = 'dbug='; connection node_1; DROP TABLE t1; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); connection node_2; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); connection node_3; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); disconnect node_1a; disconnect node_3; disconnect node_2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_C.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_C.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_C.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_join_with_cc_C.result 2025-05-19 16:14:24.000000000 +0000 @@ -94,9 +94,9 @@ SET GLOBAL wsrep_provider_options = 'signal=after_shift_to_joining'; connection node_1; DROP TABLE t1; -call mtr.add_suppression("WSREP: Send action {(.*), STATE_REQUEST} returned -107 \\(Transport endpoint is not connected\\)"); -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Send action {.* STATE_REQUEST} returned -107 \\((Transport endpoint|Socket) is not connected\\)"); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); connection node_2; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); connection node_3; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_parallel_apply_3nodes.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_parallel_apply_3nodes.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_parallel_apply_3nodes.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_parallel_apply_3nodes.result 2025-05-19 16:14:24.000000000 +0000 @@ -26,7 +26,7 @@ SELECT f1 = 111 FROM t1; f1 = 111 1 -SELECT COUNT(*) IN (1, 2) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%committed%'; +SELECT COUNT(*) IN (1, 2) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE '%committed%' OR STATE LIKE 'Waiting for certification'); COUNT(*) IN (1, 2) 1 SET GLOBAL wsrep_slave_threads = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_pc_weight.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_pc_weight.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_pc_weight.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_pc_weight.result 2025-05-19 16:14:24.000000000 +0000 @@ -157,10 +157,10 @@ CALL mtr.add_suppression('WSREP: gcs_caused\\(\\) returned -1'); connection node_2; CALL mtr.add_suppression('SYNC message from member'); -CALL mtr.add_suppression('user message in state LEAVING'); -CALL mtr.add_suppression('sending install message failed: (Transport endpoint is not connected|Socket is not connected)'); -CALL mtr.add_suppression('overriding reported weight for'); +CALL mtr.add_suppression('WSREP: user message in state LEAVING'); +CALL mtr.add_suppression('sending install message failed: (Transport endpoint|Socket) is not connected'); +CALL mtr.add_suppression('overriding reported weight for '); connection node_3; CALL mtr.add_suppression('WSREP: user message in state LEAVING'); -CALL mtr.add_suppression('sending install message failed: (Transport endpoint is not connected|Socket is not connected)'); -CALL mtr.add_suppression('overriding reported weight for'); +CALL mtr.add_suppression('sending install message failed: (Transport endpoint|Socket) is not connected'); +CALL mtr.add_suppression('overriding reported weight for '); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_safe_to_bootstrap.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_safe_to_bootstrap.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_safe_to_bootstrap.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_safe_to_bootstrap.result 2025-05-19 16:14:24.000000000 +0000 @@ -47,7 +47,7 @@ CALL mtr.add_suppression("Plugin 'wsrep' registration as a STORAGE ENGINE failed\\."); CALL mtr.add_suppression("Plugin 'wsrep' registration as a FUNCTION failed\\."); CALL mtr.add_suppression("Failed to initialize plugins\\."); -CALL mtr.add_suppression("WSREP: gcs/src/gcs_core.cpp:core_handle_uuid_msg\\(\\)"); +CALL mtr.add_suppression("WSREP: gcs/src/gcs_core\\.cpp:core_handle_uuid_msg\\(\\)"); connection node_3; CALL mtr.add_suppression("WSREP: no nodes coming from prim view, prim not possible"); CALL mtr.add_suppression("WSREP: It may not be safe to bootstrap the cluster from this node"); @@ -61,7 +61,7 @@ CALL mtr.add_suppression("Plugin 'wsrep' registration as a STORAGE ENGINE failed\\."); CALL mtr.add_suppression("Plugin 'wsrep' registration as a FUNCTION failed\\."); CALL mtr.add_suppression("Failed to initialize plugins\\."); -CALL mtr.add_suppression("WSREP: gcs/src/gcs_core.cpp:core_handle_uuid_msg\\(\\)"); +CALL mtr.add_suppression("WSREP: gcs/src/gcs_core\\.cpp:core_handle_uuid_msg\\(\\)"); SHOW CREATE TABLE t1; Table Create Table t1 CREATE TABLE `t1` ( diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_sst_donor_non_prim.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_sst_donor_non_prim.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_sst_donor_non_prim.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_sst_donor_non_prim.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,26 @@ +connection node_2; +connection node_1; +connection node_1; +connection node_2; +connection node_3; +connection node_2; +connection node_1; +SET GLOBAL debug_dbug = '+d,sync.wsrep_sst_donor_after_donation'; +connection node_2; +# restart +connection node_1; +SET DEBUG_SYNC = 'now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached'; +SET GLOBAL wsrep_provider_options = 'gmcast.isolate=1'; +SET SESSION wsrep_sync_wait=0; +SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_sst_donor_after_donation_continue'; +SET DEBUG_SYNC = 'RESET'; +SET GLOBAL debug_dbug = ''; +SET GLOBAL wsrep_provider_options = 'gmcast.isolate=0'; +SET SESSION wsrep_sync_wait=15; +connection node_1; +connection node_2; +connection node_3; +connection node_1; +connection node_1; +CALL mtr.add_suppression("WSREP: sst sent called when not SST donor, state CONNECTED"); +CALL mtr.add_suppression("WSREP: .* returned an error: Not connected to Primary Component"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_vote_rejoin_mysqldump.result mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_vote_rejoin_mysqldump.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/r/galera_vote_rejoin_mysqldump.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/r/galera_vote_rejoin_mysqldump.result 2025-05-19 16:14:24.000000000 +0000 @@ -57,7 +57,6 @@ t1 CREATE TABLE `t1` ( `f1` int(11) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci -CALL mtr.add_suppression("is inconsistent with group"); connection node_3; SHOW CREATE TABLE t1; Table Create Table @@ -80,4 +79,5 @@ CALL mtr.add_suppression("Table 'mysql\\.gtid_slave_pos' doesn't exist"); connection node_2; # restart +CALL mtr.add_suppression("WSREP: .+ is inconsistent with group"); connection node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/suite.pm mariadb-10.11.13/mysql-test/suite/galera_3nodes/suite.pm --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/suite.pm 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/suite.pm 2025-05-19 16:14:24.000000000 +0000 @@ -9,69 +9,71 @@ push @::global_suppressions, ( - qr(WSREP: wsrep_sst_receive_address is set to '127.0.0.1), - qr(WSREP: Could not open saved state file for reading: .*), - qr(WSREP: Could not open state file for reading: .*), - qr(WSREP: Gap in state sequence. Need state transfer.), + qr(WSREP: wsrep_sst_receive_address is set to '127\.0\.0\.1), + qr(WSREP: Could not open saved state file for reading: ), + qr(WSREP: Could not open state file for reading: ), + qr(WSREP: Gap in state sequence\. Need state transfer\.), qr(WSREP: Failed to prepare for incremental state transfer:), - qr(WSREP:.*down context.*), + qr(WSREP: .*down context.*), qr(WSREP: Failed to send state UUID:), - qr(WSREP: last inactive check more than .* skipping check), - qr(WSREP: Releasing seqno [0-9]* before [0-9]* was assigned.), - qr|WSREP: access file\(.*gvwstate.dat\) failed\(No such file or directory\)|, + qr(WSREP: last inactive check more than .+ skipping check), + qr(WSREP: Releasing seqno [0-9]+ before [0-9]+ was assigned\.), + qr|WSREP: access file\(.*gvwstate.dat\) failed ?\(No such file or directory\)|, qr(WSREP: Quorum: No node with complete state), qr(WSREP: Initial position was provided by configuration or SST, avoiding override), - qr|WSREP: discarding established \(time wait\) .*|, - qr(WSREP: There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside. Will use that one.), + qr|WSREP: discarding established \(time wait\) |, + qr(WSREP: There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside\. Will use that one\.), qr(WSREP: evs::proto.*), - qr|WSREP: Ignoring possible split-brain \(allowed by configuration\) from view:.*|, + qr|WSREP: Ignoring possible split-brain \(allowed by configuration\) from view:|, qr(WSREP: no nodes coming from prim view, prim not possible), - qr(WSREP: Member .* requested state transfer from .* but it is impossible to select State Transfer donor: Resource temporarily unavailable), + qr(WSREP: Member .+ ?requested state transfer from .+ but it is impossible to select State Transfer donor: Resource temporarily unavailable), qr(WSREP: user message in state LEAVING), - qr(WSREP: .* sending install message failed: Transport endpoint is not connected), + qr(WSREP: .* sending install message failed: (Transport endpoint|Socket) is not connected), qr(WSREP: .* sending install message failed: Resource temporarily unavailable), - qr(WSREP: Maximum writeset size exceeded by .*), - qr(WSREP: transaction size exceeded.*), - qr(WSREP: RBR event .*), - qr(WSREP: Ignoring error for TO isolated action: .*), - qr(WSREP: transaction size limit .*), - qr(WSREP: rbr write fail, .*), - qr(WSREP: .*Backend not supported: foo.*), - qr(WSREP: .*Failed to initialize backend using .*), - qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at .*), + qr(WSREP: Maximum writeset size exceeded by ), + qr(WSREP: transaction size exceeded), + qr(WSREP: RBR event ), + qr(WSREP: Ignoring error for TO isolated action: ), + qr(WSREP: transaction size limit ), + qr(WSREP: rbr write fail, ), + qr(WSREP: .*Backend not supported: foo), + qr(WSREP: .*Failed to initialize backend using ), + qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at ), qr(WSREP: gcs connect failed: Socket type not supported), qr(WSREP: failed to open gcomm backend connection: 110: failed to reach primary view: 110 .*), - qr(WSREP: .*Failed to open backend connection: -110 .*), - qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at .*), + qr(WSREP: .*Failed to open backend connection: -110 ), + qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at ), qr(WSREP: gcs connect failed: Connection timed out), qr|WSREP: wsrep::connect\(.*\) failed: 7|, - qr(WSREP: SYNC message from member .* in non-primary configuration. Ignored.), + qr(WSREP: SYNC message from member .+ ?in non-primary configuration\. Ignored\.), qr(WSREP: Could not find peer:), - qr(WSREP: TO isolation failed for: .*), - qr|WSREP: gcs_caused\(\) returned .*|, - qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(SYNCED\). Message ignored.|, - qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(JOINED\). Message ignored.|, - qr|WSREP: Unsupported protocol downgrade: incremental data collection disabled. Expect abort.|, + qr(WSREP: TO isolation failed for: ), + qr|WSREP: gcs_caused\(\) returned |, + qr|WSREP: Protocol violation\. JOIN message sender .+ ?is not in state transfer \(SYNCED\)\. Message ignored\.|, + qr|WSREP: Protocol violation\. JOIN message sender .+ ?is not in state transfer \(JOINED\)\. Message ignored\.|, + qr|WSREP: Unsupported protocol downgrade: incremental data collection disabled\. Expect abort\.|, qr(WSREP: Action message in non-primary configuration from member [0-9]*), qr(WSREP: Last Applied Action message in non-primary configuration from member [0-9]*), - qr(WSREP: discarding established .*), - qr|WSREP: .*core_handle_uuid_msg.*|, - qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0. WSREP_SYNC_WAIT_BEFORE_READ is on), - qr|WSREP: JOIN message from member .* in non-primary configuration. Ignored.|, - qr|Query apply failed:*|, - qr(WSREP: Ignoring error*), - qr(WSREP: Failed to remove page file .*), - qr(WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to .*), - qr|WSREP: Sending JOIN failed: -107 \(Transport endpoint is not connected\). Will retry in new primary component.|, + qr(WSREP: discarding established ), + qr|WSREP: .*core_handle_uuid_msg|, + qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0\. WSREP_SYNC_WAIT_BEFORE_READ is on), + qr|WSREP: JOIN message from member .+ ?in non-primary configuration\. Ignored\.|, + qr|WSREP: .*Query apply failed:|, + qr(WSREP: Ignoring error), + qr(WSREP: Failed to remove page file ), + qr(WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to ), + qr+WSREP: Sending JOIN failed: -107 \((Transport endpoint|Socket) is not connected\)\. Will retry in new primary component\.+, + qr+WSREP: Send action \{.* STATE_REQUEST\} returned -107 \((Transport endpoint|Socket) is not connected\)+, qr|WSREP: Trying to continue unpaused monitor|, qr|WSREP: Wait for gtid returned error 3 while waiting for prior transactions to commit before setting position|, + qr|WSREP: Failed to report last committed|, ); sub which($) { return `sh -c "command -v $_[0]"` } sub skip_combinations { my %skip = (); - $skip{'include/have_mariabackup.inc'} = 'Need ss' + $skip{'include/have_mariabackup.inc'} = 'Need socket statistics utility' unless which("lsof") || which("sockstat") || which("ss"); %skip; } diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/GAL-501.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/GAL-501.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/GAL-501.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/GAL-501.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,7 +6,7 @@ [mysqld.1] wsrep-cluster-address=gcomm:// wsrep_node_address=[::1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.1.#sst_port' wsrep_node_incoming_address='[::1]:@mysqld.1.port' bind-address=:: @@ -14,7 +14,7 @@ [mysqld.2] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' wsrep_node_address=[::1] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.2.#sst_port' wsrep_node_incoming_address='[::1]:@mysqld.2.port' bind-address=:: @@ -22,7 +22,7 @@ [mysqld.3] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' wsrep_node_address=[::1] -wsrep_provider_options='base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1]' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.3.#sst_port' wsrep_node_incoming_address='[::1]:@mysqld.3.port' bind-address=:: diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/GCF-354.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/GCF-354.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/GCF-354.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/GCF-354.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,10 +7,10 @@ wsrep-debug=1 [mysqld.1] -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1G;pc.weight=4' +wsrep_provider_options='repl.causal_read_timeout=PT90S;pc.weight=4;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=256M' [mysqld.2] -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1G' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=256M' [mysqld.3] -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1G' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=256M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/MDEV-36360.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/MDEV-36360.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/MDEV-36360.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/MDEV-36360.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,110 @@ +# +# MDEV-36360: Don't grab table-level X locks for applied inserts. +# +# It prevents a debug crash in wsrep_report_error() which happened when appliers would run +# with FK and UK checks disabled and erroneously execute plain inserts as bulk inserts. +# +# Moreover, in release builds such a behavior could lead to deadlocks between two applier +# threads if a thread waiting for a table-level lock was ordered before the lock holder. +# In that case the lock holder would proceed to commit order and wait forever for the +# now-blocked other applier thread to commit before. +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_debug.inc + +--let $galera_connection_name = node_3 +--let $galera_server_number = 3 +--source include/galera_connect.inc + +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--let $node_3=node_3 +--source ../galera/include/auto_increment_offset_save.inc + +# Create parent and child tables. +--connection node_1 +CREATE TABLE parent ( + id INT PRIMARY KEY +) ENGINE=InnoDB; + +CREATE TABLE child ( + id INT PRIMARY KEY, + parent_id INT, + KEY (parent_id), + CONSTRAINT FOREIGN KEY (parent_id) REFERENCES parent(id) +) ENGINE=InnoDB; + +# Fill the parent table with rows that will later be used by the child. +INSERT INTO parent VALUES (1), (2); + +# Wait until the rows are replicated on node #3. +--connection node_3 +--let $wait_condition = SELECT COUNT(*) = 2 FROM parent +--source include/wait_condition.inc + +# Delete one row from the parent table on node #3 and rejoin the cluster. +SET SESSION wsrep_on = OFF; +DELETE FROM parent WHERE id = 1; +SET SESSION wsrep_on = ON; +--echo Restarting server 3 with one applier thread having FK and UK checks disabled +--source include/shutdown_mysqld.inc +--let $start_mysqld_params = --wsrep_slave_FK_checks=0 --wsrep_slave_UK_checks=0 +--source ../galera/include/start_mysqld.inc + +# Stop the applier after writing a row into the child table. +SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_after_write_row'; + +# Insert a child row that will be applied on node #3, but should not +# grab table-level X-lock. +--connection node_1 +INSERT INTO child VALUES (1, 1); + +--connection node_3 +SET DEBUG_SYNC = 'now WAIT_FOR sync.wsrep_after_write_row_reached'; +# Now that the applier has hit the global sync point wait, reset it +# so that the upcoming insert avoids it. +SET GLOBAL DEBUG_DBUG = ''; +# Don't wait for applied insert to commit. +SET wsrep_sync_wait = 0; +SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL signal.wsrep_after_write_row'; +# The insert should pass the sync point, as otherwise if the applied insert +# grabs table-level X-lock, they'll both deadlock forever. +INSERT INTO child VALUES (2, 2); +SET DEBUG_SYNC = 'RESET'; + +--let $assert_select = foreign key constraint fails +--let $assert_count = 0 +--let $assert_text = no FK constraint failure +--let $assert_only_after = CURRENT_TEST +--let $assert_file = $MYSQLTEST_VARDIR/log/mysqld.3.err +--source include/assert_grep.inc + +# Child row insert is applied even though there's no parent row. +--echo Server 3 +SELECT COUNT(*) AS EXPECT_1 FROM parent; +SELECT COUNT(*) AS EXPECT_2 FROM child; + +# Check other nodes have both parent and child rows. +--connection node_1 +--echo Server 1 +SET wsrep_sync_wait = 15; +SELECT COUNT(*) AS EXPECT_2 FROM parent; +SELECT COUNT(*) AS EXPECT_2 FROM child; + +--connection node_2 +--echo Server 2 +SET wsrep_sync_wait = 15; +SELECT COUNT(*) AS EXPECT_2 FROM parent; +SELECT COUNT(*) AS EXPECT_2 FROM child; + +DROP TABLE child; +DROP TABLE parent; + +# Restore original auto_increment_offset values. +--source ../galera/include/auto_increment_offset_restore.inc + +--source include/galera_end.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera-features#115.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#115.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera-features#115.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#115.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,4 @@ +!include ../galera_3nodes.cnf + +[mysqld] +wsrep-ignore-apply-errors=0 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera-features#115.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#115.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera-features#115.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#115.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,89 @@ +# +# This test tests that one successful node wins over two nodes that fail for +# different reasons +# +--source include/galera_cluster.inc +--source include/have_innodb.inc + +--let $galera_connection_name = node_3 +--let $galera_server_number = 3 +--source include/galera_connect.inc + +--let $node_1=node_1 +--let $node_2=node_2 +--let $node_3=node_3 +--source suite/galera/include/auto_increment_offset_save.inc + +# create inconsistency on node 2 +--connection node_2 +SET GLOBAL wsrep_on=OFF; +DROP SCHEMA test; + +# create inconsistency on node 3 +--connection node_3 +SET GLOBAL wsrep_on=OFF; +CREATE TABLE t1 (f1 INTEGER); + +--connection node_1 +CREATE TABLE t1 (f1 INTEGER); + +# check that nodes 2 and 3 leave the cluster, and node_1 is Primary by itself + +--connection node_1 +SET SESSION wsrep_sync_wait=0; +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc + +--connection node_2 +SET SESSION wsrep_sync_wait=0; +--let $wait_condition = SELECT VARIABLE_VALUE = 'Disconnected' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + +--connection node_3 +SET SESSION wsrep_sync_wait=0; +--let $wait_condition = SELECT VARIABLE_VALUE = 'Disconnected' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + +--connection node_1 +# this is a workaround for "sending install message failed" BUG: +# https://github.com/codership/galera/issues/174 +# When it happens, node_1 becomes non-prim +SET GLOBAL wsrep_provider_options='pc.bootstrap=YES'; +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + +# restart nodes 2 and 3, since they failed + +--connection node_2 +# need to reinitialize connection due to a "Bad handshake" bug. +# we reconnect using the 'mysql' database as 'test' was dropped. +--disconnect node_2 +--connect node_2, 127.0.0.1, root, , mysql, $NODE_MYPORT_2 + --source include/restart_mysqld.inc + +--connection node_3 + --source include/restart_mysqld.inc + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; +--source include/wait_condition.inc +--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + +DROP TABLE test.t1; + +--source suite/galera/include/auto_increment_offset_restore.inc + +--connection node_2 +CALL mtr.add_suppression("Inconsistent by consensus\\."); +CALL mtr.add_suppression("Error_code: 1049"); +CALL mtr.add_suppression("WSREP: Failed to apply trx: source: "); +CALL mtr.add_suppression("WSREP: Failed to apply app buffer"); +CALL mtr.add_suppression("WSREP: Node consistency compromized, leaving cluster\\.\\.\\."); + +--connection node_3 +CALL mtr.add_suppression("Inconsistent by consensus\\."); +CALL mtr.add_suppression("Error_code: 1050"); +CALL mtr.add_suppression("WSREP: Failed to apply trx: source: "); +CALL mtr.add_suppression("WSREP: Failed to apply app buffer"); +CALL mtr.add_suppression("WSREP: Node consistency compromized, leaving cluster\\.\\.\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera-features#119.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#119.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera-features#119.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera-features#119.test 2025-05-19 16:14:24.000000000 +0000 @@ -66,6 +66,5 @@ CALL mtr.add_suppression("WSREP: Node consistency compromized, leaving cluster\\.\\.\\."); CALL mtr.add_suppression("WSREP: Failed to apply write set: "); - # Restore original auto_increment_offset values. --source ../galera/include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_2_cluster.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_2_cluster.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,25 +1,34 @@ !include ../galera_2x3nodes.cnf +[mysqld] +wsrep-debug=1 + [mysqld.1] wsrep_gtid_domain_id=1 server-id=11 +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT20S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S;pc.wait_prim_timeout=PT60S;gcache.size=128M;pc.weight=2' [mysqld.2] wsrep_gtid_domain_id=1 server-id=12 +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT20S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S;pc.wait_prim_timeout=PT60S;gcache.size=128M' [mysqld.3] wsrep_gtid_domain_id=1 server-id=13 +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT20S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S;pc.wait_prim_timeout=PT60S;gcache.size=128M' [mysqld.4] wsrep_gtid_domain_id=2 server-id=21 +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.4.#galera_port;evs.suspect_timeout=PT20S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S;pc.wait_prim_timeout=PT60S;gcache.size=128M' [mysqld.5] wsrep_gtid_domain_id=2 server-id=22 +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.5.#galera_port;evs.suspect_timeout=PT20S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S;pc.wait_prim_timeout=PT60S;gcache.size=128M' [mysqld.6] wsrep_gtid_domain_id=2 server-id=23 +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.6.#galera_port;evs.suspect_timeout=PT20S;evs.inactive_timeout=PT30S;evs.install_timeout=PT25S;pc.wait_prim_timeout=PT60S;gcache.size=128M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_2_cluster.combinations mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.combinations --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_2_cluster.combinations 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.combinations 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,5 @@ +[binlogon] +log-bin +log-slave-updates + +[binlogoff] diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_2_cluster.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_2_cluster.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_2_cluster.test 2025-05-19 16:14:24.000000000 +0000 @@ -9,14 +9,17 @@ --source include/big_test.inc --source include/galera_cluster.inc --source include/have_innodb.inc +--source include/force_restart.inc +--connect node_6, 127.0.0.1, root, , test, $NODE_MYPORT_6 --connect node_5, 127.0.0.1, root, , test, $NODE_MYPORT_5 - --connect node_4, 127.0.0.1, root, , test, $NODE_MYPORT_4 + --connection node_4 ---replace_result $NODE_MYPORT_1 NODE_MYPORT_1 +--disable_query_log --eval CHANGE MASTER TO master_host='127.0.0.1', master_user='root', master_port=$NODE_MYPORT_1, master_use_gtid=current_pos; +--enable_query_log START SLAVE; --source include/wait_for_slave_to_start.inc @@ -42,7 +45,6 @@ SELECT COUNT(*) = 1 FROM t1; ---connect node_6, 127.0.0.1, root, , test, $NODE_MYPORT_6 --connection node_6 SELECT COUNT(*) = 1 FROM t1; @@ -81,23 +83,46 @@ # --connection node_2 +--let $wsrep_last_committed_before_2 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed'` + +--connection node_1 +--let $wsrep_last_committed_before_1 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed'` + +--connection node_3 +--let $wsrep_last_committed_before_3 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed'` + +--connection node_4 +--let $wsrep_last_committed_before_4 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed'` ---let $wsrep_last_committed_before = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed'` +--connection node_5 +--let $wsrep_last_committed_before_5 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed'` + +--connection node_6 +--let $wsrep_last_committed_before_6 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed'` + +--connection node_2 OPTIMIZE TABLE t1; +--let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before_2 + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' +--source include/wait_condition.inc --connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before_1 + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' +--source include/wait_condition.inc ---let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' +--connection node_3 +--let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before_3 + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' --source include/wait_condition.inc --connection node_4 +--let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before_4 + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' +--source include/wait_condition.inc ---let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' +--connection node_5 +--let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before_5 + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' --source include/wait_condition.inc --connection node_6 - ---let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' +--let $wait_condition = SELECT VARIABLE_VALUE >= $wsrep_last_committed_before_6 + 1 FROM INFORMATION_SCHEMA.SESSION_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed' --source include/wait_condition.inc # @@ -115,6 +140,7 @@ SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; +--source include/wait_until_ready.inc SET GLOBAL GTID_SLAVE_POS=""; --connection node_1 @@ -122,35 +148,56 @@ SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; +--source include/wait_until_ready.inc --connection node_2 SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; - -CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +--source include/wait_until_ready.inc --connection node_3 SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; - -CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +--source include/wait_until_ready.inc --connection node_5 SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; - -CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +--source include/wait_until_ready.inc --connection node_6 SET GLOBAL wsrep_on = OFF; RESET MASTER; SET GLOBAL wsrep_on = ON; +--source include/wait_until_ready.inc + +connection node_1; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); + +connection node_2; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); + +connection node_3; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); + +connection node_4; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); + +connection node_5; +CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); +connection node_6; CALL mtr.add_suppression("Ignoring server id .* for non bootstrap node"); +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since "); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_allowlist.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_allowlist.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_allowlist.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_allowlist.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -7,7 +7,7 @@ wsrep_allowlist="127.0.0.1,127.0.0.2,127.0.0.3" [mysqld.2] -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=127.0.0.2;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=127.0.0.2;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' # Variable is only used on bootstrap node, so this will be ignored wsrep_allowlist="127.0.0.1,127.0.0.2,127.0.0.3,127.0.0.4,127.0.0.5" @@ -18,9 +18,9 @@ wsrep_sst_receive_address='127.0.0.2:@mysqld.2.#sst_port' [mysqld.3] -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;gmcast.listen_addr=127.0.0.3;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.ignore_quorum=TRUE;pc.wait_prim=FALSE' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;gmcast.listen_addr=127.0.0.3;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.ignore_quorum=TRUE;pc.wait_prim=FALSE;gcache.size=10M' wsrep_node_address=127.0.0.3 wsrep_sst_receive_address=127.0.0.3:@mysqld.3.#sst_port wsrep_node_incoming_address=127.0.0.3:@mysqld.3.port -wsrep_sst_receive_address='127.0.0.3:@mysqld.3.#sst_port' \ No newline at end of file +wsrep_sst_receive_address='127.0.0.3:@mysqld.3.#sst_port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_certification_ccc.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_certification_ccc.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_certification_ccc.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_certification_ccc.test 2025-05-19 16:14:24.000000000 +0000 @@ -50,4 +50,3 @@ --source ../galera/include/auto_increment_offset_restore.inc --source include/galera_end.inc - diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_duplicate_primary_value.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_duplicate_primary_value.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_duplicate_primary_value.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_duplicate_primary_value.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/galera_cluster.inc +--source include/have_innodb.inc --source include/have_debug.inc --source include/have_debug_sync.inc --source include/big_test.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,12 +5,12 @@ [mysqld.1] wsrep_node_name='node.1' -wsrep_provider_options='base_port=@mysqld.1.#galera_port;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem;socket.dynamic=true' +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem;socket.dynamic=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] wsrep_node_name='node.2' -wsrep_provider_options='base_port=@mysqld.2.#galera_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.3] wsrep_node_name='node.3' -wsrep_provider_options='base_port=@mysqld.3.#galera_port;socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem;socket.dynamic=true' +wsrep_provider_options='socket.ssl=yes;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/galera-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/galera-key.pem;socket.dynamic=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_dynamic_protocol.test 2025-05-19 16:14:24.000000000 +0000 @@ -14,7 +14,6 @@ --let $node_3 = node_3 --source ../galera/include/auto_increment_offset_save.inc - --connection node_1 --let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment'; --source include/wait_condition.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_evs_suspect_timeout.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_evs_suspect_timeout.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_evs_suspect_timeout.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_evs_suspect_timeout.test 2025-05-19 16:14:24.000000000 +0000 @@ -87,5 +87,6 @@ --source include/wait_condition.inc DROP TABLE t1; + # Restore original auto_increment_offset values. --source ../galera/include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_garbd.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_garbd.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_garbd.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_garbd.test 2025-05-19 16:14:24.000000000 +0000 @@ -9,14 +9,9 @@ --source include/big_test.inc # Save galera ports ---connection node_1 --source suite/galera/include/galera_base_port.inc --let $NODE_GALERAPORT_1 = $_NODE_GALERAPORT ---connection node_2 ---source suite/galera/include/galera_base_port.inc ---let $NODE_GALERAPORT_2 = $_NODE_GALERAPORT - --let $galera_connection_name = node_3 --let $galera_server_number = 3 --source include/galera_connect.inc @@ -81,10 +76,10 @@ # Workaround for galera#101 --connection node_1 -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); --connection node_2 -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); --connection node_3 -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test 2025-05-19 16:14:24.000000000 +0000 @@ -10,11 +10,9 @@ --source include/have_debug.inc --source include/have_debug_sync.inc ---connection node_1 -# Save original auto_increment_offset values. ---let $node_1=node_1 ---let $node_2=node_2 ---let $node_3=node_3 +# Save galera ports +--source suite/galera/include/galera_base_port.inc +--let $NODE_GALERAPORT_1 = $_NODE_GALERAPORT --let $galera_connection_name = node_3 --let $galera_server_number = 3 @@ -22,12 +20,13 @@ --source suite/galera/include/galera_base_port.inc --let $NODE_GALERAPORT_3 = $_NODE_GALERAPORT +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--let $node_3=node_3 --source ../galera/include/auto_increment_offset_save.inc -# Save galera ports --connection node_1 ---source suite/galera/include/galera_base_port.inc ---let $NODE_GALERAPORT_1 = $_NODE_GALERAPORT --let $datadir= `SELECT @@datadir` --let $innodb_max_dirty_pages_pct = `SELECT @@innodb_max_dirty_pages_pct` @@ -41,10 +40,6 @@ CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; - ---connection node_2 ---source suite/galera/include/galera_base_port.inc ---let $NODE_GALERAPORT_2 = $_NODE_GALERAPORT --echo Killing node #3 to free ports for garbd ... --connection node_3 @@ -124,13 +119,16 @@ --eval SET GLOBAL innodb_max_dirty_pages_pct_lwm = $innodb_max_dirty_pages_pct_lwm --enable_query_log +# Restore original auto_increment_offset values. --source ../galera/include/auto_increment_offset_restore.inc +# Workaround for galera#101 + --connection node_1 -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); --connection node_2 -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); --connection node_3 -CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0 (.*) is not in state transfer \\(SYNCED\\)"); +CALL mtr.add_suppression("WSREP: Protocol violation\\. JOIN message sender 1\\.0( \\(.*\\))? is not in state transfer \\(SYNCED\\)\\. Message ignored\\."); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -3,6 +3,7 @@ # following tests such as galera_3nodes.galera_var_dirty_reads2 !include ../galera_2x3nodes.cnf + [mysqld.1] wsrep_gtid_domain_id=1 server-id=11 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_gtid_2_cluster.test 2025-05-19 16:14:24.000000000 +0000 @@ -42,8 +42,10 @@ SHOW STATUS LIKE 'wsrep_cluster_size'; #--disable_parsing --connection node_1 ---replace_result $NODE_MYPORT_4 NODE_MYPORT_4 +--echo --- ignore_server_ids=(12,13) +--disable_query_log --eval change master to master_host='127.0.0.1', master_user='root', master_port=$NODE_MYPORT_4, master_use_gtid=current_pos, ignore_server_ids=(12,13); +--enable_query_log start slave; --source include/wait_for_slave_to_start.inc select @@gtid_binlog_state; @@ -51,8 +53,10 @@ #--query_vertical SHOW SLAVE STATUS; --connection node_4 ---replace_result $NODE_MYPORT_1 NODE_MYPORT_1 +--echo --- ignore_server_ids=(22,23) +--disable_query_log --eval change master to master_host='127.0.0.1', master_user='root', master_port=$NODE_MYPORT_1, master_use_gtid=current_pos, ignore_server_ids=(22,23); +--enable_query_log start slave; --source include/wait_for_slave_to_start.inc select @@gtid_binlog_state; @@ -73,6 +77,8 @@ --echo cluster 2 node 1 --connection node_4 +--let $wait_condition = SELECT COUNT(*) = 1 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; insert into t1 values (2, 21, 1); select @@gtid_binlog_state; @@ -81,11 +87,16 @@ --source include/save_master_gtid.inc --connection node_4 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 2 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo cluster 1 node 2 --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 2 FROM test.t1; +--source include/wait_condition.inc + select @@gtid_binlog_state; insert into t1 values (1, 12, 3); select @@gtid_binlog_state; @@ -95,10 +106,14 @@ --source include/save_master_gtid.inc --connection node_4 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 3 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo cluster 1 node 3 --connection node_3 +--let $wait_condition = SELECT COUNT(*) = 3 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; insert into t1 values (1, 13, 4); select @@gtid_binlog_state; @@ -108,10 +123,14 @@ --source include/save_master_gtid.inc --connection node_4 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 4 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo cluster 2 node 2 --connection node_5 +--let $wait_condition = SELECT COUNT(*) = 4 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; insert into t1 values (2, 22, 2); select @@gtid_binlog_state; @@ -121,37 +140,55 @@ --source include/save_master_gtid.inc --connection node_1 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 5 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo cluster 2 node 3 --connection node_6 +--let $wait_condition = SELECT COUNT(*) = 5 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; insert into t1 values (2, 23, 3); select @@gtid_binlog_state; --echo #wait for sync cluster 2 and 1 --connection node_4 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc --source include/save_master_gtid.inc --connection node_1 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo # check other nodes are consistent --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; select * from t1 order by 1, 2, 3; --connection node_3 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; select * from t1 order by 1, 2, 3; --connection node_5 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; select * from t1 order by 1, 2, 3; --connection node_6 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; select * from t1 order by 1, 2, 3; --echo cluster 1 node 1 --connection node_1 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; drop table t1; stop slave; @@ -210,8 +247,10 @@ # Then we will kill node D and set up the replication between A and E # To see whether fail over works or not. --connection node_1 ---replace_result $NODE_MYPORT_6 NODE_MYPORT_6 +--echo --- ignore_server_ids=(12,13) +--disable_query_log --eval change master to master_host='127.0.0.1', master_user='root', master_port=$NODE_MYPORT_6, master_use_gtid=current_pos, ignore_server_ids=(12,13); +--enable_query_log start slave; --source include/wait_for_slave_to_start.inc select @@gtid_binlog_state; @@ -219,8 +258,10 @@ #--query_vertical SHOW SLAVE STATUS; --connection node_4 ---replace_result $NODE_MYPORT_3 NODE_MYPORT_3 +--echo --- ignore_server_ids=(22,23) +--disable_query_log --eval change master to master_host='127.0.0.1', master_user='root', master_port=$NODE_MYPORT_3, master_use_gtid=current_pos, ignore_server_ids=(22,23); +--enable_query_log start slave; --source include/wait_for_slave_to_start.inc select @@gtid_binlog_state; @@ -242,6 +283,8 @@ --sleep 2 --echo cluster 2 node 1 --connection node_4 +--let $wait_condition = SELECT COUNT(*) = 1 FROM test.t1; +--source include/wait_condition.inc insert into t1 values (2, 21, 1); select @@gtid_binlog_state; @@ -250,11 +293,16 @@ --source include/save_master_gtid.inc --connection node_4 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 2 FROM test.t1; +--source include/wait_condition.inc + select * from t1 order by 1, 2, 3; --echo cluster 1 node 2 --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 2 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; insert into t1 values (1, 12, 3); select @@gtid_binlog_state; @@ -264,10 +312,14 @@ --source include/save_master_gtid.inc --connection node_4 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 3 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo cluster 1 node 3 --connection node_3 +--let $wait_condition = SELECT COUNT(*) = 3 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; insert into t1 values (1, 13, 4); select @@gtid_binlog_state; @@ -277,10 +329,14 @@ --source include/save_master_gtid.inc --connection node_4 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 4 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo cluster 2 node 2 --connection node_5 +--let $wait_condition = SELECT COUNT(*) = 4 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; insert into t1 values (2, 22, 2); select @@gtid_binlog_state; @@ -290,10 +346,14 @@ --source include/save_master_gtid.inc --connection node_1 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 5 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo cluster 2 node 3 --connection node_6 +--let $wait_condition = SELECT COUNT(*) = 5 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; insert into t1 values (2, 23, 3); select @@gtid_binlog_state; @@ -303,24 +363,36 @@ --source include/save_master_gtid.inc --connection node_1 --source include/sync_with_master_gtid.inc +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select * from t1 order by 1, 2, 3; --echo # check other nodes are consistent --connection node_2 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; select * from t1 order by 1, 2, 3; --connection node_3 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; select * from t1 order by 1, 2, 3; --connection node_5 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; select * from t1 order by 1, 2, 3; --connection node_6 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; select * from t1 order by 1, 2, 3; --echo cluster 1 node 1 --connection node_1 +--let $wait_condition = SELECT COUNT(*) = 6 FROM test.t1; +--source include/wait_condition.inc select @@gtid_binlog_state; drop table t1; stop slave; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -6,7 +6,7 @@ [mysqld.1] wsrep-cluster-address=gcomm:// -wsrep_provider_options='base_host=[::1];base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.1.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.1.port' @@ -15,7 +15,7 @@ [mysqld.2] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.2.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.2.port' @@ -25,7 +25,7 @@ [mysqld.3] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1]:@mysqld.3.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.3.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.3.port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup_section.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup_section.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup_section.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mariabackup_section.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -9,7 +9,7 @@ [mysqld.1] wsrep-cluster-address=gcomm:// -wsrep_provider_options='base_host=[::1];base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.1.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.1.port' @@ -18,7 +18,7 @@ [mysqld.2] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.2.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.2.port' @@ -28,7 +28,7 @@ [mysqld.3] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1]:@mysqld.3.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.3.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.3.port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,7 +5,7 @@ [mysqld.1] wsrep-cluster-address=gcomm:// -wsrep_provider_options='base_host=[::1];base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.1.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.1.port' @@ -13,7 +13,7 @@ [mysqld.2] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.2.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.2.port' @@ -21,7 +21,7 @@ [mysqld.3] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1]:@mysqld.3.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.3.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.3.port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_mysqldump.test 2025-05-19 16:14:24.000000000 +0000 @@ -20,7 +20,6 @@ CREATE USER 'sst'; GRANT ALL PRIVILEGES ON *.* TO 'sst'; ---let $wsrep_sst_auth_orig = `SELECT @@wsrep_sst_auth` SET GLOBAL wsrep_sst_auth = 'sst:'; --connection node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,7 +5,7 @@ [mysqld.1] wsrep-cluster-address=gcomm:// -wsrep_provider_options='base_host=[::1];base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.1.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.1.port' @@ -13,7 +13,7 @@ [mysqld.2] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.2.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.2.port' @@ -21,7 +21,7 @@ [mysqld.3] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1]:@mysqld.3.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.3.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.3.port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync_section.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync_section.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync_section.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ipv6_rsync_section.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -10,7 +10,7 @@ [mysqld.1] wsrep-cluster-address=gcomm:// -wsrep_provider_options='base_host=[::1];base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1]:@mysqld.1.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.1.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.1.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.1.port' @@ -18,7 +18,7 @@ [mysqld.2] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1]:@mysqld.2.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.2.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.2.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.2.port' @@ -26,7 +26,7 @@ [mysqld.3] wsrep_cluster_address='gcomm://[::1]:@mysqld.1.#galera_port' -wsrep_provider_options='base_host=[::1];base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1]:@mysqld.3.#ist_port' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;gmcast.listen_addr=tcp://[::]:@mysqld.3.#galera_port;ist.recv_addr=[::1];evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' wsrep_sst_receive_address='[::1]:@mysqld.3.#sst_port' wsrep_node_address=::1 wsrep_node_incoming_address='[::1]:@mysqld.3.port' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ist_gcache_rollover.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ !include ../galera_3nodes.cnf [mysqld.1] -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.ignore_sb=true;gcache.size=1M' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1M' auto_increment_increment=1 auto_increment_offset=1 # this will force server restarts before this test @@ -9,14 +9,14 @@ wsrep-debug=1 [mysqld.2] -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.ignore_sb=true;gcache.size=1M' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1M' auto_increment_increment=2 auto_increment_offset=2 loose-galera-ist-gcache-rollover=2 wsrep-debug=1 [mysqld.3] -wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.ignore_sb=true;gcache.size=1M' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=1M' auto_increment_increment=3 auto_increment_offset=3 loose-galera-ist-gcache-rollover=3 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_A.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_A.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_A.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_A.test 2025-05-19 16:14:24.000000000 +0000 @@ -259,12 +259,12 @@ DROP TABLE t1; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --connection node_2 -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --connection node_3 -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --source ../galera/include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_B.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_B.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_B.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_B.test 2025-05-19 16:14:24.000000000 +0000 @@ -270,13 +270,13 @@ DROP TABLE t1; -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --connection node_2 -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --connection node_3 -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --disconnect node_1a diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_C.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_C.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_C.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_join_with_cc_C.test 2025-05-19 16:14:24.000000000 +0000 @@ -295,13 +295,13 @@ DROP TABLE t1; -call mtr.add_suppression("WSREP: Send action {(.*), STATE_REQUEST} returned -107 \\(Transport endpoint is not connected\\)"); -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Send action {.* STATE_REQUEST} returned -107 \\((Transport endpoint|Socket) is not connected\\)"); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --connection node_2 -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --connection node_3 -call mtr.add_suppression("WSREP: Rejecting JOIN message from (.*): new State Transfer required\\."); +call mtr.add_suppression("WSREP: Rejecting JOIN message from .+: new State Transfer required\\."); --source ../galera/include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_parallel_apply_3nodes.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_parallel_apply_3nodes.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_parallel_apply_3nodes.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_parallel_apply_3nodes.test 2025-05-19 16:14:24.000000000 +0000 @@ -65,7 +65,7 @@ --connection node_3 SELECT f1 = 111 FROM t1; -SELECT COUNT(*) IN (1, 2) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND STATE LIKE '%committed%'; +SELECT COUNT(*) IN (1, 2) FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'system user' AND (STATE LIKE '%committed%' OR STATE LIKE 'Waiting for certification'); SET GLOBAL wsrep_slave_threads = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_pc_bootstrap.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_pc_bootstrap.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_pc_bootstrap.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_pc_bootstrap.test 2025-05-19 16:14:24.000000000 +0000 @@ -17,7 +17,6 @@ --let $node_1 = node_1 --let $node_2 = node_2 --let $node_3 = node_3 - --source ../galera/include/auto_increment_offset_save.inc --connection node_1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_pc_weight.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_pc_weight.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_pc_weight.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_pc_weight.test 2025-05-19 16:14:24.000000000 +0000 @@ -132,11 +132,11 @@ --connection node_2 CALL mtr.add_suppression('SYNC message from member'); -CALL mtr.add_suppression('user message in state LEAVING'); -CALL mtr.add_suppression('sending install message failed: (Transport endpoint is not connected|Socket is not connected)'); -CALL mtr.add_suppression('overriding reported weight for'); +CALL mtr.add_suppression('WSREP: user message in state LEAVING'); +CALL mtr.add_suppression('sending install message failed: (Transport endpoint|Socket) is not connected'); +CALL mtr.add_suppression('overriding reported weight for '); --connection node_3 CALL mtr.add_suppression('WSREP: user message in state LEAVING'); -CALL mtr.add_suppression('sending install message failed: (Transport endpoint is not connected|Socket is not connected)'); -CALL mtr.add_suppression('overriding reported weight for'); +CALL mtr.add_suppression('sending install message failed: (Transport endpoint|Socket) is not connected'); +CALL mtr.add_suppression('overriding reported weight for '); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_safe_to_bootstrap.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_safe_to_bootstrap.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_safe_to_bootstrap.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_safe_to_bootstrap.test 2025-05-19 16:14:24.000000000 +0000 @@ -14,7 +14,6 @@ --let $node_1 = node_1 --let $node_2 = node_2 --let $node_3 = node_3 - --source ../galera/include/auto_increment_offset_save.inc --connection node_1 @@ -195,7 +194,7 @@ CALL mtr.add_suppression("Plugin 'wsrep' registration as a STORAGE ENGINE failed\\."); CALL mtr.add_suppression("Plugin 'wsrep' registration as a FUNCTION failed\\."); CALL mtr.add_suppression("Failed to initialize plugins\\."); -CALL mtr.add_suppression("WSREP: gcs/src/gcs_core.cpp:core_handle_uuid_msg\\(\\)"); +CALL mtr.add_suppression("WSREP: gcs/src/gcs_core\\.cpp:core_handle_uuid_msg\\(\\)"); --connection node_3 CALL mtr.add_suppression("WSREP: no nodes coming from prim view, prim not possible"); @@ -210,7 +209,7 @@ CALL mtr.add_suppression("Plugin 'wsrep' registration as a STORAGE ENGINE failed\\."); CALL mtr.add_suppression("Plugin 'wsrep' registration as a FUNCTION failed\\."); CALL mtr.add_suppression("Failed to initialize plugins\\."); -CALL mtr.add_suppression("WSREP: gcs/src/gcs_core.cpp:core_handle_uuid_msg\\(\\)"); +CALL mtr.add_suppression("WSREP: gcs/src/gcs_core\\.cpp:core_handle_uuid_msg\\(\\)"); SHOW CREATE TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ssl_reload.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ssl_reload.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_ssl_reload.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_ssl_reload.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,10 +5,10 @@ loose-galera-ssl-reload=1 [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;socket.ssl=yes;socket.ssl_ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/client-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/client-key.pem' +wsrep_provider_options='socket.ssl=yes;socket.ssl_ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/client-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/client-key.pem;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;socket.ssl=yes;socket.ssl_ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/client-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/client-key.pem' +wsrep_provider_options='socket.ssl=yes;socket.ssl_ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/client-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/client-key.pem;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.3] -wsrep_provider_options='base_port=@mysqld.3.#galera_port;socket.ssl=yes;socket.ssl_ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/client-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/client-key.pem' +wsrep_provider_options='socket.ssl=yes;socket.ssl_ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem;socket.ssl_cert=@ENV.MYSQL_TEST_DIR/std_data/client-cert.pem;socket.ssl_key=@ENV.MYSQL_TEST_DIR/std_data/client-key.pem;repl.causal_read_timeout=PT90S;base_port=@mysqld.3.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,4 @@ +!include ../galera_3nodes.cnf + +[mysqld.2] +wsrep_sst_donor=node1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_sst_donor_non_prim.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,64 @@ +# +# Construct a situation where Donor node partitions in the +# middle of SST. The Donor should stay in non-Primary state instead of +# crashing in assertion in wsrep-lib. +# +# In the test, node_2 is restarted and node_1 configured to be +# the donor. Node_1 execution is stopped before sst_sent() is +# called and node_1 is made to partition from the cluster. +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/big_test.inc + +--let $galera_connection_name = node_3 +--let $galera_server_number = 3 +--source include/galera_connect.inc + +--let $node_1=node_1 +--let $node_2=node_2 +--let $node_3=node_3 +--source ../galera/include/auto_increment_offset_save.inc + +--connection node_2 +--source include/shutdown_mysqld.inc +--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat + +--connection node_1 +SET GLOBAL debug_dbug = '+d,sync.wsrep_sst_donor_after_donation'; + +--connection node_2 +--source include/start_mysqld.inc + +--connection node_1 +SET DEBUG_SYNC = 'now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached'; +SET GLOBAL wsrep_provider_options = 'gmcast.isolate=1'; +SET SESSION wsrep_sync_wait=0; +--let $wait_condition = SELECT VARIABLE_VALUE = 'non-Primary' FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_status' +--source include/wait_condition.inc + +SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_sst_donor_after_donation_continue'; +SET DEBUG_SYNC = 'RESET'; +SET GLOBAL debug_dbug = ''; + +SET GLOBAL wsrep_provider_options = 'gmcast.isolate=0'; +SET SESSION wsrep_sync_wait=15; + +--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--connection node_1 +--source include/wait_condition.inc +--connection node_2 +--source include/wait_condition.inc +--connection node_3 + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_ready' +--source include/wait_condition.inc + +--source ../galera/include/auto_increment_offset_restore.inc + +--connection node_1 +CALL mtr.add_suppression("WSREP: sst sent called when not SST donor, state CONNECTED"); +CALL mtr.add_suppression("WSREP: .* returned an error: Not connected to Primary Component"); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_vote_rejoin_mysqldump.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_vote_rejoin_mysqldump.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_vote_rejoin_mysqldump.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_vote_rejoin_mysqldump.test 2025-05-19 16:14:24.000000000 +0000 @@ -69,7 +69,6 @@ --connection node_2 SHOW CREATE TABLE t1; -CALL mtr.add_suppression("is inconsistent with group"); --connection node_3 SHOW CREATE TABLE t1; @@ -83,6 +82,7 @@ # restart node so we don't fail on WSREP_START_POSITION internal check --source include/restart_mysqld.inc --source include/wait_until_connected_again.inc +CALL mtr.add_suppression("WSREP: .+ is inconsistent with group"); --connection node_1 --let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema.test 2025-05-19 16:14:24.000000000 +0000 @@ -9,6 +9,7 @@ --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 --connection node_1 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema_init.test mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema_init.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema_init.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/galera_wsrep_schema_init.test 2025-05-19 16:14:24.000000000 +0000 @@ -10,6 +10,7 @@ --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 --connection node_1 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 @@ -55,4 +56,3 @@ SELECT cluster_uuid = (SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_state_uuid') FROM mysql.wsrep_cluster_members; --source ../galera/include/auto_increment_offset_restore.inc - diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/inconsistency_shutdown.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/inconsistency_shutdown.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes/t/inconsistency_shutdown.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes/t/inconsistency_shutdown.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -5,5 +5,4 @@ wsrep-ignore-apply-errors=0 [ENV] -galera_cluster_size = 3 - +galera_cluster_size=3 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/r/MDEV-26707.result 2025-05-19 16:14:24.000000000 +0000 @@ -2,7 +2,7 @@ connection node_1; connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; -connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; connect node_3a, 127.0.0.1, root, , test, $NODE_MYPORT_3; connection node_1; connection node_2; @@ -45,7 +45,7 @@ SET SESSION wsrep_sync_wait = DEFAULT; SET DEBUG_SYNC = 'now SIGNAL continue'; connection node_2; -ERROR HY000: Got error 6 "No such device or address" during COMMIT +ERROR HY000: Error while appending streaming replication fragment(provider status: Not connected to Primary Component) connection node_2a; SET DEBUG_SYNC = 'RESET'; connection node_1a; @@ -74,15 +74,15 @@ SET SESSION wsrep_sync_wait = 0; SET SESSION wsrep_sync_wait = DEFAULT; connection node_1a; -SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; EXPECT_0 0 connection node_2a; -SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; EXPECT_0 0 connection node_3a; -SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; EXPECT_0 0 connection node_1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/r/galera_sr_kill_slave_before_apply.result mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/r/galera_sr_kill_slave_before_apply.result --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/r/galera_sr_kill_slave_before_apply.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/r/galera_sr_kill_slave_before_apply.result 2025-05-19 16:14:24.000000000 +0000 @@ -44,7 +44,7 @@ SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; EXPECT_0 0 -call mtr.add_suppression("WSREP: node uuid:.*"); +call mtr.add_suppression("WSREP: node uuid:"); connection node_1; DROP TABLE t1; DROP TABLE t2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/suite.pm mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/suite.pm --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/suite.pm 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/suite.pm 2025-05-19 16:14:24.000000000 +0000 @@ -9,38 +9,39 @@ push @::global_suppressions, ( - qr(WSREP: wsrep_sst_receive_address is set to '127.0.0.1), - qr(WSREP: Could not open saved state file for reading: .*), - qr(WSREP: Could not open state file for reading: .*), - qr(WSREP: Gap in state sequence. Need state transfer.), + qr(WSREP: wsrep_sst_receive_address is set to '127\.0\.0\.1), + qr(WSREP: Could not open saved state file for reading: ), + qr(WSREP: Could not open state file for reading: ), + qr(WSREP: Gap in state sequence\. Need state transfer\.), qr(WSREP: Failed to prepare for incremental state transfer:), - qr(WSREP:.*down context.*), + qr(WSREP: .*down context.*), qr(WSREP: Failed to send state UUID:), qr(WSREP: last inactive check more than .* skipping check), qr(WSREP: SQL statement was ineffective), - qr(WSREP: Releasing seqno [0-9]* before [0-9]* was assigned.), - qr|WSREP: access file\(.*gvwstate.dat\) failed\(No such file or directory\)|, + qr(WSREP: Releasing seqno [0-9]+ before [0-9]+ was assigned\.), + qr|WSREP: access file\(.*gvwstate.dat\) failed ?\(No such file or directory\)|, qr(WSREP: Quorum: No node with complete state), qr(WSREP: Initial position was provided by configuration or SST, avoiding override), - qr|WSREP: discarding established \(time wait\) .*|, - qr(WSREP: There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside. Will use that one.), + qr|WSREP: discarding established \(time wait\) |, + qr(WSREP: There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside\. Will use that one\.), qr(WSREP: evs::proto.*), - qr|WSREP: Ignoring possible split-brain \(allowed by configuration\) from view:.*|, + qr|WSREP: Ignoring possible split-brain \(allowed by configuration\) from view:|, qr(WSREP: no nodes coming from prim view, prim not possible), - qr(WSREP: Member .* requested state transfer from .* but it is impossible to select State Transfer donor: Resource temporarily unavailable), + qr(WSREP: Member .+ ?requested state transfer from .+ but it is impossible to select State Transfer donor: Resource temporarily unavailable), qr(WSREP: user message in state LEAVING), - qr(WSREP: .* sending install message failed: Transport endpoint is not connected), + qr(WSREP: .* sending install message failed: (Transport endpoint|Socket) is not connected), qr(WSREP: .* sending install message failed: Resource temporarily unavailable), - qr(WSREP: Sending JOIN failed: -107 \(Transport endpoint is not connected\). Will retry in new primary component.), qr(WSREP: Could not find peer:), - qr|WSREP: gcs_caused\(\) returned .*|, - qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(SYNCED\). Message ignored.|, - qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(JOINED\). Message ignored.|, + qr|WSREP: gcs_caused\(\) returned |, + qr|WSREP: Protocol violation\. JOIN message sender .+ ?is not in state transfer \(SYNCED\)\. Message ignored\.|, + qr|WSREP: Protocol violation\. JOIN message sender .+ ?is not in state transfer \(JOINED\)\. Message ignored\.|, qr(WSREP: Action message in non-primary configuration from member [0-9]*), qr(WSREP: Last Applied Action message in non-primary configuration from member [0-9]*), - qr|WSREP: .*core_handle_uuid_msg.*|, - qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0. WSREP_SYNC_WAIT_BEFORE_READ is on), - qr(WSREP: JOIN message from member .* in non-primary configuration. Ignored.), + qr|WSREP: .*core_handle_uuid_msg|, + qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0\. WSREP_SYNC_WAIT_BEFORE_READ is on), + qr|WSREP: JOIN message from member .+ ?in non-primary configuration\. Ignored\.|, + qr+WSREP: Sending JOIN failed: -107 \((Transport endpoint|Socket) is not connected\)\. Will retry in new primary component\.+, + qr+WSREP: Send action \{.* STATE_REQUEST\} returned -107 \((Transport endpoint|Socket) is not connected\)+, ); bless { }; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/GCF-606.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-606.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/GCF-606.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-606.test 2025-05-19 16:14:24.000000000 +0000 @@ -85,4 +85,5 @@ --connection node_2 CALL mtr.add_suppression("WSREP: failed to send SR rollback for "); + --source ../galera/include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/GCF-817.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-817.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/GCF-817.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-817.test 2025-05-19 16:14:24.000000000 +0000 @@ -5,6 +5,7 @@ --source include/galera_cluster.inc --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/GCF-832.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-832.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/GCF-832.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/GCF-832.test 2025-05-19 16:14:24.000000000 +0000 @@ -7,6 +7,7 @@ --source include/force_restart.inc --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/MDEV-26707.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/MDEV-26707.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/MDEV-26707.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/MDEV-26707.test 2025-05-19 16:14:24.000000000 +0000 @@ -21,7 +21,7 @@ --connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 --connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 ---connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 --connect node_3a, 127.0.0.1, root, , test, $NODE_MYPORT_3 # Save original auto_increment_offset values. @@ -158,15 +158,15 @@ --connection node_1a --let $wait_condition = SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log --source include/wait_condition.inc -SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; --connection node_2a --let $wait_condition = SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log --source include/wait_condition.inc -SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; --connection node_3a --let $wait_condition = SELECT COUNT(*) = 0 FROM mysql.wsrep_streaming_log --source include/wait_condition.inc -SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; +SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; --connection node_1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_isolate_master.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_isolate_master.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_isolate_master.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_isolate_master.test 2025-05-19 16:14:24.000000000 +0000 @@ -6,6 +6,7 @@ # Test the effect of gmcast.isolate on master during an SR transaction # --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_join_slave.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_join_slave.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_join_slave.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_join_slave.test 2025-05-19 16:14:24.000000000 +0000 @@ -9,6 +9,7 @@ --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 --connection node_1 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_master.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_master.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_master.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_master.test 2025-05-19 16:14:24.000000000 +0000 @@ -6,6 +6,7 @@ --source include/have_innodb.inc --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback.test 2025-05-19 16:14:24.000000000 +0000 @@ -9,6 +9,7 @@ --source include/have_innodb.inc --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback2.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback2.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback2.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_after_apply_rollback2.test 2025-05-19 16:14:24.000000000 +0000 @@ -8,6 +8,7 @@ --source include/have_innodb.inc --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 @@ -65,4 +66,5 @@ --connection node_1 --disconnect node_1a DROP TABLE t1; + --source ../galera/include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_before_apply.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_before_apply.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_before_apply.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_kill_slave_before_apply.test 2025-05-19 16:14:24.000000000 +0000 @@ -9,6 +9,7 @@ --source include/force_restart.inc --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 @@ -88,7 +89,7 @@ --connection node_2 SELECT COUNT(*) AS EXPECT_0 FROM mysql.wsrep_streaming_log; # As noted above sometimes node delivers the same view twice -call mtr.add_suppression("WSREP: node uuid:.*"); +call mtr.add_suppression("WSREP: node uuid:"); --connection node_1 DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.cnf mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.cnf --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,4 @@ !include ../galera_3nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.weight=3' - +wsrep_provider_options='pc.weight=3;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.test mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.test --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_sr_threeway_split.test 2025-05-19 16:14:24.000000000 +0000 @@ -7,6 +7,7 @@ --source include/have_innodb.inc --connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + # Save original auto_increment_offset values. --let $node_1=node_1 --let $node_2=node_2 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr-master.opt mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr-master.opt --- mariadb-10.11.11/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr-master.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_3nodes_sr/t/galera_vote_sr-master.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1,2 +1 @@ --wsrep-ignore-apply-errors=0 - diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/disabled.def mariadb-10.11.13/mysql-test/suite/galera_sr/disabled.def --- mariadb-10.11.11/mysql-test/suite/galera_sr/disabled.def 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/disabled.def 2025-05-19 16:14:24.000000000 +0000 @@ -9,7 +9,3 @@ # Do not use any TAB characters for whitespace. # ############################################################################## - -GCF-1060 : MDEV-32160 GCF-1060 test failure due to wsrep MDL conflict -# Links to below failures in MDEV-30172 -MDEV-25718 : timeout related to wsrep_sync_wait and DEBUG_SYNC diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/r/MENT-2042.result mariadb-10.11.13/mysql-test/suite/galera_sr/r/MENT-2042.result --- mariadb-10.11.11/mysql-test/suite/galera_sr/r/MENT-2042.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/r/MENT-2042.result 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,9 @@ +connection node_2; +connection node_1; +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); +XA START 'a'; +ERROR 42000: This version of MariaDB doesn't yet support 'XA transactions with Galera replication' +DROP TABLE t1; +disconnect node_1a; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/r/galera_sr_cc_master.result mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_cc_master.result --- mariadb-10.11.11/mysql-test/suite/galera_sr/r/galera_sr_cc_master.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_cc_master.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ connection node_2; connection node_1; -CALL mtr.add_suppression("WSREP: discarding established.*"); +CALL mtr.add_suppression("WSREP: discarding established"); connection node_1; connection node_2; connection node_2; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/r/galera_sr_kill_all_norecovery.result mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_kill_all_norecovery.result --- mariadb-10.11.11/mysql-test/suite/galera_sr/r/galera_sr_kill_all_norecovery.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_kill_all_norecovery.result 2025-05-19 16:14:24.000000000 +0000 @@ -2,6 +2,7 @@ connection node_1; connection node_1; connection node_2; +connection node_1; CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; SET SESSION wsrep_trx_fragment_size = 1; SET AUTOCOMMIT=OFF; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/r/galera_sr_myisam.result mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_myisam.result --- mariadb-10.11.11/mysql-test/suite/galera_sr/r/galera_sr_myisam.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/r/galera_sr_myisam.result 2025-05-19 16:14:24.000000000 +0000 @@ -14,3 +14,4 @@ 1 DROP TABLE t1; connection node_1; +SET GLOBAL wsrep_mode = DEFAULT; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/r/mysql-wsrep-features#148.result mariadb-10.11.13/mysql-test/suite/galera_sr/r/mysql-wsrep-features#148.result --- mariadb-10.11.11/mysql-test/suite/galera_sr/r/mysql-wsrep-features#148.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/r/mysql-wsrep-features#148.result 2025-05-19 16:14:24.000000000 +0000 @@ -25,7 +25,7 @@ connection node_1; Got one of the listed errors connection node_2; -SET GLOBAL wsrep_slave_threads = 1; +SET GLOBAL wsrep_slave_threads = DEFAULT; SET GLOBAL debug_dbug = ''; SET DEBUG_SYNC='now SIGNAL signal.wsrep_apply_cb'; SET DEBUG_SYNC='now SIGNAL signal.wsrep_apply_cb'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/suite.pm mariadb-10.11.13/mysql-test/suite/galera_sr/suite.pm --- mariadb-10.11.11/mysql-test/suite/galera_sr/suite.pm 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/suite.pm 2025-05-19 16:14:24.000000000 +0000 @@ -9,62 +9,64 @@ push @::global_suppressions, ( - qr(WSREP: wsrep_sst_receive_address is set to '127.0.0.1), - qr(WSREP: Could not open saved state file for reading: .*), - qr(WSREP: Could not open state file for reading: .*), - qr(WSREP: Gap in state sequence. Need state transfer.), + qr(WSREP: wsrep_sst_receive_address is set to '127\.0\.0\.1), + qr(WSREP: Could not open saved state file for reading: ), + qr(WSREP: Could not open state file for reading: ), + qr(WSREP: Gap in state sequence\. Need state transfer\.), qr(WSREP: Failed to prepare for incremental state transfer:), - qr(WSREP:.*down context.*), + qr(WSREP: .*down context.*), qr(WSREP: Failed to send state UUID:), - qr(WSREP: last inactive check more than .* skipping check), - qr(WSREP: Releasing seqno [0-9]* before [0-9]* was assigned.), - qr|WSREP: access file\(.*gvwstate.dat\) failed\(No such file or directory\)|, + qr(WSREP: last inactive check more than .+ skipping check), + qr(WSREP: Releasing seqno [0-9]+ before [0-9]+ was assigned\.), + qr|WSREP: access file\(.*gvwstate.dat\) failed ?\(No such file or directory\)|, qr(WSREP: Quorum: No node with complete state), qr(WSREP: Initial position was provided by configuration or SST, avoiding override), - qr|WSREP: discarding established \(time wait\) .*|, - qr(WSREP: There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside. Will use that one.), + qr|WSREP: discarding established \(time wait\) |, + qr(WSREP: There are no nodes in the same segment that will ever be able to become donors, yet there is a suitable donor outside\. Will use that one\.), qr(WSREP: evs::proto.*), - qr|WSREP: Ignoring possible split-brain \(allowed by configuration\) from view:.*|, + qr|WSREP: Ignoring possible split-brain \(allowed by configuration\) from view:|, qr(WSREP: no nodes coming from prim view, prim not possible), - qr(WSREP: Member .* requested state transfer from .* but it is impossible to select State Transfer donor: Resource temporarily unavailable), + qr(WSREP: Member .+ ?requested state transfer from .+ but it is impossible to select State Transfer donor: Resource temporarily unavailable), qr(WSREP: user message in state LEAVING), - qr(WSREP: .* sending install message failed: Transport endpoint is not connected), + qr(WSREP: .* sending install message failed: (Transport endpoint|Socket) is not connected), qr(WSREP: .* sending install message failed: Resource temporarily unavailable), - qr(WSREP: Maximum writeset size exceeded by .*), - qr(WSREP: transaction size exceeded.*), - qr(WSREP: RBR event .*), - qr(WSREP: Ignoring error for TO isolated action: .*), - qr(WSREP: transaction size limit .*), - qr(WSREP: rbr write fail, .*), - qr(WSREP: .*Backend not supported: foo.*), - qr(WSREP: .*Failed to initialize backend using .*), - qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at .*), + qr(WSREP: Maximum writeset size exceeded by ), + qr(WSREP: transaction size exceeded), + qr(WSREP: RBR event ), + qr(WSREP: Ignoring error for TO isolated action: ), + qr(WSREP: transaction size limit ), + qr(WSREP: rbr write fail, ), + qr(WSREP: .*Backend not supported: foo), + qr(WSREP: .*Failed to initialize backend using ), + qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at ), qr(WSREP: gcs connect failed: Socket type not supported), qr(WSREP: failed to open gcomm backend connection: 110: failed to reach primary view: 110 .*), - qr(WSREP: .*Failed to open backend connection: -110 .*), - qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at .*), + qr(WSREP: .*Failed to open backend connection: -110 ), + qr(WSREP: .*Failed to open channel 'my_wsrep_cluster' at ), qr(WSREP: gcs connect failed: Connection timed out), qr|WSREP: wsrep::connect\(.*\) failed: 7|, - qr(WSREP: SYNC message from member .* in non-primary configuration. Ignored.), + qr(WSREP: SYNC message from member .+ ?in non-primary configuration\. Ignored\.), qr(WSREP: Could not find peer:), - qr(WSREP: TO isolation failed for: .*), - qr|WSREP: gcs_caused\(\) returned .*|, - qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(SYNCED\). Message ignored.|, - qr|WSREP: Protocol violation. JOIN message sender .* is not in state transfer \(JOINED\). Message ignored.|, - qr|WSREP: Unsupported protocol downgrade: incremental data collection disabled. Expect abort.|, + qr(WSREP: TO isolation failed for: ), + qr|WSREP: gcs_caused\(\) returned |, + qr|WSREP: Protocol violation\. JOIN message sender .+ ?is not in state transfer \(SYNCED\)\. Message ignored\.|, + qr|WSREP: Protocol violation\. JOIN message sender .+ ?is not in state transfer \(JOINED\)\. Message ignored\.|, + qr|WSREP: Unsupported protocol downgrade: incremental data collection disabled\. Expect abort\.|, qr(WSREP: Action message in non-primary configuration from member [0-9]*), qr(WSREP: Last Applied Action message in non-primary configuration from member [0-9]*), - qr(WSREP: discarding established .*), - qr|WSREP: .*core_handle_uuid_msg.*|, - qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0. WSREP_SYNC_WAIT_BEFORE_READ is on), - qr|WSREP: JOIN message from member .* in non-primary configuration. Ignored.|, - qr|Query apply failed:*|, - qr(WSREP: Ignoring error*), - qr(WSREP: Failed to remove page file .*), - qr(WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to .*), - qr|WSREP: Sending JOIN failed: -107 \(Transport endpoint is not connected\). Will retry in new primary component.|, + qr(WSREP: discarding established ), + qr|WSREP: .*core_handle_uuid_msg|, + qr(WSREP: --wsrep-causal-reads=ON takes precedence over --wsrep-sync-wait=0\. WSREP_SYNC_WAIT_BEFORE_READ is on), + qr|WSREP: JOIN message from member .+ ?in non-primary configuration\. Ignored\.|, + qr|WSREP: .*Query apply failed:|, + qr(WSREP: Ignoring error), + qr(WSREP: Failed to remove page file ), + qr(WSREP: wsrep_sst_method is set to 'mysqldump' yet mysqld bind_address is set to ), + qr+WSREP: Sending JOIN failed: -107 \((Transport endpoint|Socket) is not connected\)\. Will retry in new primary component\.+, + qr+WSREP: Send action \{.* STATE_REQUEST\} returned -107 \((Transport endpoint|Socket) is not connected\)+, qr|WSREP: Trying to continue unpaused monitor|, qr|WSREP: Wait for gtid returned error 3 while waiting for prior transactions to commit before setting position|, + qr|WSREP: Failed to report last committed|, ); bless { }; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/MDEV-27615.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/MDEV-27615.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/MDEV-27615.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/MDEV-27615.test 2025-05-19 16:14:24.000000000 +0000 @@ -69,5 +69,4 @@ --disconnect node_2 --connect node_2, 127.0.0.1, root, , test, $NODE_MYPORT_2 - --source suite/galera/include/auto_increment_offset_restore.inc diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/MDEV-28971.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/MDEV-28971.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/MDEV-28971.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/MDEV-28971.test 2025-05-19 16:14:24.000000000 +0000 @@ -4,6 +4,7 @@ # --source include/galera_cluster.inc +--source include/have_sequence.inc CREATE SEQUENCE SEQ NOCACHE ENGINE=InnoDB; SET SESSION wsrep_trx_fragment_size=1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/MENT-2042.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/MENT-2042.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/MENT-2042.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/MENT-2042.test 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,23 @@ +# +# MENT-2042 Assertion `bf_aborted()' failed in wsrep::transaction::xa_replay_common() +# + +--source include/galera_cluster.inc +--source include/have_debug_sync.inc + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 + +--connection node_1 +--let connection_id = `SELECT CONNECTION_ID()` + +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); + +# +# Execute XA transaction up to COMMIT +# + +--error ER_NOT_SUPPORTED_YET +XA START 'a'; + +DROP TABLE t1; +--disconnect node_1a diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_cc_master.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_cc_master.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_cc_master.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_cc_master.test 2025-05-19 16:14:24.000000000 +0000 @@ -7,7 +7,7 @@ # leave the cluster. # -CALL mtr.add_suppression("WSREP: discarding established.*"); +CALL mtr.add_suppression("WSREP: discarding established"); # Save original auto_increment_offset values. --let $node_1=node_1 diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_gtid-master.opt mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_gtid-master.opt --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_gtid-master.opt 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_gtid-master.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1 +1 @@ - --log-bin --log-slave-updates --loose-galera-sr-gtid-unique +--log-bin --log-slave-updates --loose-galera-sr-gtid-unique diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.cnf mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.cnf --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,8 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.recovery=false' +wsrep_provider_options='pc.recovery=false;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' + auto_increment_offset=1 [mysqld.2] diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_all_norecovery.test 2025-05-19 16:14:24.000000000 +0000 @@ -11,6 +11,8 @@ --let $node_2=node_2 --source ../../galera/include/auto_increment_offset_save.inc +--connection node_1 + CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB; SET SESSION wsrep_trx_fragment_size = 1; SET AUTOCOMMIT=OFF; @@ -26,7 +28,6 @@ --let $wait_condition = SELECT COUNT(*) > 0 FROM mysql.wsrep_streaming_log; --source include/wait_condition.inc - # # Kill the entire cluster and restart # diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_kill_slave.cnf mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_slave.cnf --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_kill_slave.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_kill_slave.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,4 @@ !include ../galera_2nodes.cnf [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;pc.weight=2' +wsrep_provider_options='pc.weight=2;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_myisam.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_myisam.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_myisam.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_myisam.test 2025-05-19 16:14:24.000000000 +0000 @@ -22,6 +22,4 @@ DROP TABLE t1; --connection node_1 ---disable_query_log SET GLOBAL wsrep_mode = DEFAULT; ---enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.cnf mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.cnf --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -4,8 +4,7 @@ # causes the first MTR connection to be forefully dropped by Galera, which in turn confuses MTR [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=1;pc.ignore_sb=true' +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' [mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=1;pc.ignore_sb=true' - +wsrep_provider_options='pc.ignore_sb=true;repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=10M' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_mysqldump_sst.test 2025-05-19 16:14:24.000000000 +0000 @@ -85,4 +85,3 @@ # Restore original auto_increment_offset values. --source ../galera/include/auto_increment_offset_restore.inc - diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_shutdown_slave.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_shutdown_slave.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_shutdown_slave.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_shutdown_slave.test 2025-05-19 16:14:24.000000000 +0000 @@ -8,6 +8,7 @@ --let $node_1=node_1 --let $node_2=node_2 --source ../galera/include/auto_increment_offset_save.inc + --connection node_2 call mtr.add_suppression("WSREP: Failed to scan the last segment to the end\\. Last events may be missing\\. Last recovered event: "); diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_small_gcache.cnf mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_small_gcache.cnf --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/galera_sr_small_gcache.cnf 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/galera_sr_small_gcache.cnf 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,7 @@ !include ../galera_2nodes.cnf + [mysqld.1] -wsrep_provider_options='base_port=@mysqld.1.#galera_port;gcache.size=16K' -[mysqld.2] -wsrep_provider_options='base_port=@mysqld.2.#galera_port;gcache.size=16K' +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.1.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=16K' +[mysqld.2] +wsrep_provider_options='repl.causal_read_timeout=PT90S;base_port=@mysqld.2.#galera_port;evs.suspect_timeout=PT10S;evs.inactive_timeout=PT30S;evs.install_timeout=PT15S;pc.wait_prim_timeout=PT60S;gcache.size=16K' diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/mysql-wsrep-features#14.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#14.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/mysql-wsrep-features#14.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#14.test 2025-05-19 16:14:24.000000000 +0000 @@ -18,4 +18,3 @@ --connection node_2 --source include/galera_wait_ready.inc - diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/mysql-wsrep-features#148.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#148.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/mysql-wsrep-features#148.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#148.test 2025-05-19 16:14:24.000000000 +0000 @@ -47,7 +47,7 @@ --reap --connection node_2 -SET GLOBAL wsrep_slave_threads = 1; +SET GLOBAL wsrep_slave_threads = DEFAULT; SET GLOBAL debug_dbug = ''; SET DEBUG_SYNC='now SIGNAL signal.wsrep_apply_cb'; SET DEBUG_SYNC='now SIGNAL signal.wsrep_apply_cb'; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/mysql-wsrep-features#22.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#22.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/mysql-wsrep-features#22.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#22.test 2025-05-19 16:14:24.000000000 +0000 @@ -40,7 +40,6 @@ --connection node_1 SELECT COUNT(*) = 6 FROM t1; - --connection node_2 SELECT COUNT(*) = 6 FROM t1; diff -Nru mariadb-10.11.11/mysql-test/suite/galera_sr/t/mysql-wsrep-features#96.test mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#96.test --- mariadb-10.11.11/mysql-test/suite/galera_sr/t/mysql-wsrep-features#96.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/galera_sr/t/mysql-wsrep-features#96.test 2025-05-19 16:14:24.000000000 +0000 @@ -39,7 +39,3 @@ --connection node_1 DROP TABLE t1; DROP TABLE t2; - - - - diff -Nru mariadb-10.11.11/mysql-test/suite/gcol/r/innodb_virtual_basic.result mariadb-10.11.13/mysql-test/suite/gcol/r/innodb_virtual_basic.result --- mariadb-10.11.11/mysql-test/suite/gcol/r/innodb_virtual_basic.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/gcol/r/innodb_virtual_basic.result 2025-05-19 16:14:24.000000000 +0000 @@ -86,6 +86,8 @@ DROP INDEX idx1 ON t; DROP INDEX idx2 ON t; DROP TABLE t; +# restart +set default_storage_engine=innodb; /* Test large BLOB data */ CREATE TABLE `t` ( `a` BLOB, diff -Nru mariadb-10.11.11/mysql-test/suite/gcol/r/innodb_virtual_stats.result mariadb-10.11.13/mysql-test/suite/gcol/r/innodb_virtual_stats.result --- mariadb-10.11.11/mysql-test/suite/gcol/r/innodb_virtual_stats.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/gcol/r/innodb_virtual_stats.result 2025-05-19 16:14:24.000000000 +0000 @@ -38,6 +38,10 @@ idxa n_diff_pfx02 a,DB_ROW_ID idxa n_leaf_pages Number of leaf pages in the index idxa size Number of pages in the index +idxb n_diff_pfx01 b +idxb n_diff_pfx02 b,DB_ROW_ID +idxb n_leaf_pages Number of leaf pages in the index +idxb size Number of pages in the index vidxcd n_diff_pfx01 c vidxcd n_diff_pfx02 c,d vidxcd n_diff_pfx03 c,d,DB_ROW_ID @@ -54,6 +58,14 @@ GEN_CLUST_INDEX n_diff_pfx01 DB_ROW_ID GEN_CLUST_INDEX n_leaf_pages Number of leaf pages in the index GEN_CLUST_INDEX size Number of pages in the index +idxb n_diff_pfx01 b +idxb n_diff_pfx02 b,DB_ROW_ID +idxb n_leaf_pages Number of leaf pages in the index +idxb size Number of pages in the index +vidxcd n_diff_pfx01 d +vidxcd n_diff_pfx02 d,DB_ROW_ID +vidxcd n_leaf_pages Number of leaf pages in the index +vidxcd size Number of pages in the index ALTER TABLE t ADD INDEX vidxe (e), ALGORITHM=INPLACE; select count(*) from t; count(*) @@ -65,6 +77,18 @@ GEN_CLUST_INDEX n_diff_pfx01 DB_ROW_ID GEN_CLUST_INDEX n_leaf_pages Number of leaf pages in the index GEN_CLUST_INDEX size Number of pages in the index +idxb n_diff_pfx01 b +idxb n_diff_pfx02 b,DB_ROW_ID +idxb n_leaf_pages Number of leaf pages in the index +idxb size Number of pages in the index +vidxcd n_diff_pfx01 d +vidxcd n_diff_pfx02 d,DB_ROW_ID +vidxcd n_leaf_pages Number of leaf pages in the index +vidxcd size Number of pages in the index +vidxe n_diff_pfx01 e +vidxe n_diff_pfx02 e,DB_ROW_ID +vidxe n_leaf_pages Number of leaf pages in the index +vidxe size Number of pages in the index ALTER TABLE t ADD COLUMN f INT GENERATED ALWAYS AS(a + a), ADD INDEX vidxf (f), ALGORITHM=INPLACE; select count(*) from t; count(*) @@ -76,6 +100,22 @@ GEN_CLUST_INDEX n_diff_pfx01 DB_ROW_ID GEN_CLUST_INDEX n_leaf_pages Number of leaf pages in the index GEN_CLUST_INDEX size Number of pages in the index +idxb n_diff_pfx01 b +idxb n_diff_pfx02 b,DB_ROW_ID +idxb n_leaf_pages Number of leaf pages in the index +idxb size Number of pages in the index +vidxcd n_diff_pfx01 d +vidxcd n_diff_pfx02 d,DB_ROW_ID +vidxcd n_leaf_pages Number of leaf pages in the index +vidxcd size Number of pages in the index +vidxe n_diff_pfx01 e +vidxe n_diff_pfx02 e,DB_ROW_ID +vidxe n_leaf_pages Number of leaf pages in the index +vidxe size Number of pages in the index +vidxf n_diff_pfx01 f +vidxf n_diff_pfx02 f,DB_ROW_ID +vidxf n_leaf_pages Number of leaf pages in the index +vidxf size Number of pages in the index ALTER TABLE t DROP INDEX vidxcd; SELECT index_name, stat_name, stat_description FROM mysql.innodb_index_stats @@ -84,4 +124,16 @@ GEN_CLUST_INDEX n_diff_pfx01 DB_ROW_ID GEN_CLUST_INDEX n_leaf_pages Number of leaf pages in the index GEN_CLUST_INDEX size Number of pages in the index +idxb n_diff_pfx01 b +idxb n_diff_pfx02 b,DB_ROW_ID +idxb n_leaf_pages Number of leaf pages in the index +idxb size Number of pages in the index +vidxe n_diff_pfx01 e +vidxe n_diff_pfx02 e,DB_ROW_ID +vidxe n_leaf_pages Number of leaf pages in the index +vidxe size Number of pages in the index +vidxf n_diff_pfx01 f +vidxf n_diff_pfx02 f,DB_ROW_ID +vidxf n_leaf_pages Number of leaf pages in the index +vidxf size Number of pages in the index DROP TABLE t; diff -Nru mariadb-10.11.11/mysql-test/suite/gcol/t/innodb_virtual_basic.test mariadb-10.11.13/mysql-test/suite/gcol/t/innodb_virtual_basic.test --- mariadb-10.11.11/mysql-test/suite/gcol/t/innodb_virtual_basic.test 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/gcol/t/innodb_virtual_basic.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ --source include/have_innodb.inc --source include/have_partition.inc ---source include/big_test.inc +--source include/not_embedded.inc call mtr.add_suppression("\\[Warning\\] InnoDB: Compute virtual"); @@ -66,6 +66,41 @@ DROP INDEX idx2 ON t; DROP TABLE t; +let MYSQLD_DATADIR=`select @@datadir`; +let PAGE_SIZE=`select @@innodb_page_size`; +--source include/shutdown_mysqld.inc +perl; +do "$ENV{MTR_SUITE_DIR}/../innodb/include/crc32.pl"; +my $file = "$ENV{MYSQLD_DATADIR}/ibdata1"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +my $ps= $ENV{PAGE_SIZE}; +my $page; +die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps; +my $full_crc32 = unpack("N",substr($page,54,4)) & 0x10; # FIL_SPACE_FLAGS +sysseek(FILE, 7*$ps, 0) || die "Unable to seek $file\n"; +die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps; +substr($page,54,4)=pack("N",0xc001cafe); # 32 MSB of 64-bit DICT_HDR_INDEX_ID +my $polynomial = 0x82f63b78; # CRC-32C +if ($full_crc32) +{ + my $ck = mycrc32(substr($page, 0, $ps-4), 0, $polynomial); + substr($page, $ps-4, 4) = pack("N", $ck); +} +else +{ + my $ck= pack("N",mycrc32(substr($page, 4, 22), 0, $polynomial) ^ + mycrc32(substr($page, 38, $ps - 38 - 8), 0, $polynomial)); + substr($page,0,4)=$ck; + substr($page,$ps-8,4)=$ck; +} +sysseek(FILE, 7*$ps, 0) || die "Unable to rewind $file\n"; +syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n"; +close(FILE) || die "Unable to close $file"; +EOF +--source include/start_mysqld.inc +set default_storage_engine=innodb; + /* Test large BLOB data */ CREATE TABLE `t` ( `a` BLOB, diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/alter_copy_bulk.result mariadb-10.11.13/mysql-test/suite/innodb/r/alter_copy_bulk.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/alter_copy_bulk.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/alter_copy_bulk.result 2025-05-19 16:14:24.000000000 +0000 @@ -91,3 +91,24 @@ ALTER TABLE t1 FORCE, ALGORITHM=COPY; DROP TABLE t1; SET GLOBAL innodb_stats_persistent=@default_stats_persistent; +# +# MDEV-36504 Memory leak after insert into empty table +# +CREATE TABLE t1 (k INT PRIMARY KEY)ENGINE=InnoDB; +INSERT INTO t1 SET k= 1; +START TRANSACTION; +INSERT INTO t1 SET k= 2; +SELECT COUNT(*) > 0 FROM mysql.innodb_index_stats LOCK IN SHARE MODE; +COUNT(*) > 0 +1 +connect con1,localhost,root,,,; +SET innodb_lock_wait_timeout=0; +CREATE TABLE t2(f1 INT DEFAULT 1 PRIMARY KEY) +STATS_PERSISTENT= 1 ENGINE=InnoDB as SELECT k FROM t1; +ERROR HY000: Lock wait timeout exceeded; try restarting transaction +disconnect con1; +connection default; +SET innodb_lock_wait_timeout=default; +DROP TABLE t1; +DROP TABLE IF EXISTS t2; +# restart diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/alter_partitioned_debug.result mariadb-10.11.13/mysql-test/suite/innodb/r/alter_partitioned_debug.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/alter_partitioned_debug.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/alter_partitioned_debug.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ CREATE TABLE t1 (a INT, b VARCHAR(10)) ENGINE=InnoDB +STATS_PERSISTENT=1 STATS_AUTO_RECALC=0 PARTITION BY RANGE(a) (PARTITION pa VALUES LESS THAN (3), PARTITION pb VALUES LESS THAN (5)); @@ -19,9 +20,30 @@ ERROR 23000: Duplicate entry '2-two' for key 'a' connection default; DELETE FROM t1; -disconnect ddl; SET DEBUG_SYNC = 'RESET'; CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK -DROP TABLE t1; +CREATE TABLE t(a INT, b VARCHAR(10)) ENGINE=InnoDB +STATS_PERSISTENT=1 STATS_AUTO_RECALC=1; +RENAME TABLE t TO u; +DELETE FROM mysql.innodb_table_stats WHERE table_name='u'; +DELETE FROM mysql.innodb_index_stats WHERE table_name='u'; +SET STATEMENT debug_dbug='+d,dict_stats_save_exit_notify_and_wait' FOR +SELECT * FROM u; +connection ddl; +SET DEBUG_SYNC='open_tables_after_open_and_process_table +WAIT_FOR dict_stats_save_finished'; +ALTER TABLE t1 EXCHANGE PARTITION pb WITH TABLE u; +connect sync,localhost,root; +SET DEBUG_SYNC='now SIGNAL dict_stats_save_unblock'; +disconnect sync; +connection default; +a b +connection ddl; +disconnect ddl; +connection default; +SELECT * FROM u; +a b +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t1,u; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/autoinc_persist,desc.rdiff mariadb-10.11.13/mysql-test/suite/innodb/r/autoinc_persist,desc.rdiff --- mariadb-10.11.11/mysql-test/suite/innodb/r/autoinc_persist,desc.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/autoinc_persist,desc.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,6 @@ -@@ -13,212 +13,212 @@ +--- autoinc_persist.result ++++ autoinc_persist.result,desc +@@ -13,224 +13,224 @@ # # Pre-create several tables SET SQL_MODE='STRICT_ALL_TABLES'; @@ -296,8 +298,7 @@ +2 +1 +CREATE TABLE t11(a FLOAT AUTO_INCREMENT, PRIMARY KEY(a DESC)) ENGINE = InnoDB; - INSERT INTO t11 VALUES(0), (0), (0), (0), (-1), (-10), (0), - (20), (30), (31); + INSERT INTO t11 VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); SELECT * FROM t11; a --10 @@ -310,7 +311,7 @@ -20 -30 31 --CREATE TABLE t12(a DOUBLE AUTO_INCREMENT KEY) ENGINE = InnoDB; +-CREATE TABLE t11u(a FLOAT UNSIGNED AUTO_INCREMENT KEY) ENGINE = InnoDB; +30 +20 +5 @@ -320,9 +321,30 @@ +1 +-1 +-10 ++CREATE TABLE t11u(a FLOAT UNSIGNED AUTO_INCREMENT, PRIMARY KEY(a DESC)) ENGINE = InnoDB; + INSERT INTO t11u VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); + ERROR 22003: Out of range value for column 'a' at row 5 + INSERT INTO t11u VALUES(0), (0), (0), (0), (0), (20), (30), (31); + SELECT * FROM t11u; + a +-11 +-12 +-13 +-14 +-15 +-20 +-30 + 31 +-CREATE TABLE t12(a DOUBLE AUTO_INCREMENT KEY) ENGINE = InnoDB; ++30 ++20 ++15 ++14 ++13 ++12 ++11 +CREATE TABLE t12(a DOUBLE AUTO_INCREMENT, PRIMARY KEY(a DESC)) ENGINE = InnoDB; - INSERT INTO t12 VALUES(0), (0), (0), (0), (-1), (-10), (0), - (20), (30), (31); + INSERT INTO t12 VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); SELECT * FROM t12; a --10 @@ -344,10 +366,10 @@ +1 +-1 +-10 - # Scenario 1: Normal restart, to test if the counters are persisted - # Scenario 2: Delete some values, to test the counters should not be the - # one which is the largest in current table -@@ -242,14 +242,14 @@ + CREATE TABLE t12u(a DOUBLE UNSIGNED AUTO_INCREMENT KEY) ENGINE = InnoDB; + INSERT INTO t12u VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); + ERROR 22003: Out of range value for column 'a' at row 5 +@@ -268,14 +268,14 @@ SELECT MAX(a) AS `Expect 100000000000` FROM t9; Expect 100000000000 100000000000 @@ -364,7 +386,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=1234 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci INSERT INTO t13 VALUES(0); SELECT a AS `Expect 1234` FROM t13; -@@ -464,28 +464,28 @@ +@@ -490,28 +490,28 @@ INSERT INTO t1 VALUES(0), (0); SELECT * FROM t1; a @@ -398,7 +420,7 @@ # Ensure that all changes before the server is killed are persisted. set global innodb_flush_log_at_trx_commit=1; TRUNCATE TABLE t1; -@@ -498,63 +498,63 @@ +@@ -524,63 +524,63 @@ INSERT INTO t19 VALUES(0), (0); SELECT * FROM t19; a @@ -481,7 +503,7 @@ DELETE FROM t3 WHERE a > 300; SELECT MAX(a) AS `Expect 200` FROM t3; Expect 200 -@@ -566,7 +566,7 @@ +@@ -592,7 +592,7 @@ Table Create Table t3 CREATE TABLE `t3` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -490,7 +512,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=201 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci INSERT INTO t3 VALUES(0); SELECT MAX(a) AS `Expect 201` FROM t3; -@@ -579,7 +579,7 @@ +@@ -605,7 +605,7 @@ Table Create Table t3 CREATE TABLE `t3` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -499,7 +521,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=500 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci INSERT INTO t3 VALUES(0); SELECT MAX(a) AS `Expect 500` FROM t3; -@@ -591,13 +591,13 @@ +@@ -617,13 +617,13 @@ Table Create Table t3 CREATE TABLE `t3` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -515,7 +537,7 @@ INSERT INTO t3 VALUES(150), (180); UPDATE t3 SET a = 200 WHERE a = 150; INSERT INTO t3 VALUES(220); -@@ -607,7 +607,7 @@ +@@ -633,7 +633,7 @@ Table Create Table t3 CREATE TABLE `t3` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -524,7 +546,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=221 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci INSERT INTO t3 VALUES(0); SELECT MAX(a) AS `Expect 221` FROM t3; -@@ -619,7 +619,7 @@ +@@ -645,7 +645,7 @@ Table Create Table t3 CREATE TABLE `t3` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -533,7 +555,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=120 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci # MDEV-6076: Test adding an AUTO_INCREMENT COLUMN CREATE TABLE mdev6076a (b INT) ENGINE=InnoDB; -@@ -669,18 +669,18 @@ +@@ -695,18 +695,18 @@ INSERT INTO t_inplace SELECT * FROM t3; SELECT * FROM t_inplace; a @@ -559,7 +581,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=211 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci # This will keep the autoinc counter ALTER TABLE t_inplace AUTO_INCREMENT = 250, ALGORITHM = INPLACE; -@@ -689,7 +689,7 @@ +@@ -715,7 +715,7 @@ Table Create Table t_inplace CREATE TABLE `t_inplace` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -568,7 +590,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=250 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci # This should keep the autoinc counter as well ALTER TABLE t_inplace ADD COLUMN b INT, ALGORITHM = INPLACE; -@@ -699,16 +699,16 @@ +@@ -725,16 +725,16 @@ t_inplace CREATE TABLE `t_inplace` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, `b` int(11) DEFAULT NULL, @@ -590,7 +612,7 @@ # This should reset the autoinc counter to the one specified # Since it's smaller than current one but bigger than existing # biggest counter in the table -@@ -719,7 +719,7 @@ +@@ -745,7 +745,7 @@ t_inplace CREATE TABLE `t_inplace` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, `b` int(11) DEFAULT NULL, @@ -599,7 +621,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=180 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci # This should reset the autoinc counter to the next value of # current max counter in the table, since the specified value -@@ -730,7 +730,7 @@ +@@ -756,7 +756,7 @@ Table Create Table t_inplace CREATE TABLE `t_inplace` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -608,7 +630,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=123 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci INSERT INTO t_inplace VALUES(0), (0); SELECT MAX(a) AS `Expect 124` FROM t_inplace; -@@ -757,18 +757,18 @@ +@@ -783,18 +783,18 @@ INSERT INTO t_copy SELECT * FROM t3; SELECT * FROM t_copy; a @@ -634,7 +656,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=211 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci # This will keep the autoinc counter ALTER TABLE t_copy AUTO_INCREMENT = 250, ALGORITHM = COPY; -@@ -777,7 +777,7 @@ +@@ -803,7 +803,7 @@ Table Create Table t_copy CREATE TABLE `t_copy` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -643,7 +665,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=250 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci # This should keep the autoinc counter as well ALTER TABLE t_copy ADD COLUMN b INT, ALGORITHM = COPY; -@@ -787,16 +787,16 @@ +@@ -813,16 +813,16 @@ t_copy CREATE TABLE `t_copy` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, `b` int(11) DEFAULT NULL, @@ -665,7 +687,7 @@ # This should reset the autoinc counter to the one specified # Since it's smaller than current one but bigger than existing # biggest counter in the table -@@ -807,7 +807,7 @@ +@@ -833,7 +833,7 @@ t_copy CREATE TABLE `t_copy` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, `b` int(11) DEFAULT NULL, @@ -674,7 +696,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=180 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci # This should reset the autoinc counter to the next value of # current max counter in the table, since the specified value -@@ -818,7 +818,7 @@ +@@ -844,7 +844,7 @@ Table Create Table t_copy CREATE TABLE `t_copy` ( `a` smallint(6) NOT NULL AUTO_INCREMENT, @@ -683,7 +705,7 @@ ) ENGINE=InnoDB AUTO_INCREMENT=123 DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci INSERT INTO t_copy VALUES(0), (0); SELECT MAX(a) AS `Expect 124` FROM t_copy; -@@ -842,7 +842,7 @@ +@@ -868,7 +868,7 @@ 126 DROP TABLE t_copy, it_copy; # Scenario 9: Test the sql_mode = NO_AUTO_VALUE_ON_ZERO @@ -692,7 +714,7 @@ set SQL_MODE = NO_AUTO_VALUE_ON_ZERO; INSERT INTO t30 VALUES(NULL, 1), (200, 2), (0, 3); INSERT INTO t30(b) VALUES(4), (5), (6), (7); -@@ -869,20 +869,20 @@ +@@ -895,20 +895,20 @@ set global innodb_flush_log_at_trx_commit=1; CREATE TABLE t31 (a INT) ENGINE = InnoDB; INSERT INTO t31 VALUES(1), (2); @@ -719,7 +741,7 @@ INSERT INTO t32 VALUES(0), (0); # Ensure that all changes before the server is killed are persisted. set global innodb_flush_log_at_trx_commit=1; -@@ -897,7 +897,7 @@ +@@ -923,7 +923,7 @@ # increasing the counter CREATE TABLE t33 ( a BIGINT NOT NULL PRIMARY KEY, @@ -728,7 +750,7 @@ INSERT INTO t33 VALUES(1, NULL); INSERT INTO t33 VALUES(2, NULL); INSERT INTO t33 VALUES(2, NULL); -@@ -920,13 +920,13 @@ +@@ -946,13 +946,13 @@ INSERT INTO t31(a) VALUES(6), (0); SELECT * FROM t31; a b @@ -748,7 +770,7 @@ DROP TABLE t31; set SQL_MODE = NO_AUTO_VALUE_ON_ZERO; DELETE FROM t30 WHERE a = 0; -@@ -965,7 +965,7 @@ +@@ -991,7 +991,7 @@ DROP TABLE t33; CREATE TABLE t33 ( a BIGINT NOT NULL PRIMARY KEY, @@ -757,7 +779,7 @@ ALTER TABLE t33 DISCARD TABLESPACE; restore: t33 .ibd and .cfg files ALTER TABLE t33 IMPORT TABLESPACE; -@@ -975,7 +975,7 @@ +@@ -1001,8 +1001,8 @@ 4 SELECT * FROM t33; a b @@ -766,4 +788,5 @@ 3 4 +2 2 +10 1 - DROP TABLE t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t30, t32, t33; + DROP TABLE t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t11u, t12u, + t30, t32, t33; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/autoinc_persist.result mariadb-10.11.13/mysql-test/suite/innodb/r/autoinc_persist.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/autoinc_persist.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/autoinc_persist.result 2025-05-19 16:14:24.000000000 +0000 @@ -190,8 +190,7 @@ 100000000000 100000000006 CREATE TABLE t11(a FLOAT AUTO_INCREMENT KEY) ENGINE = InnoDB; -INSERT INTO t11 VALUES(0), (0), (0), (0), (-1), (-10), (0), -(20), (30), (31); +INSERT INTO t11 VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); SELECT * FROM t11; a -10 @@ -204,9 +203,22 @@ 20 30 31 +CREATE TABLE t11u(a FLOAT UNSIGNED AUTO_INCREMENT KEY) ENGINE = InnoDB; +INSERT INTO t11u VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); +ERROR 22003: Out of range value for column 'a' at row 5 +INSERT INTO t11u VALUES(0), (0), (0), (0), (0), (20), (30), (31); +SELECT * FROM t11u; +a +11 +12 +13 +14 +15 +20 +30 +31 CREATE TABLE t12(a DOUBLE AUTO_INCREMENT KEY) ENGINE = InnoDB; -INSERT INTO t12 VALUES(0), (0), (0), (0), (-1), (-10), (0), -(20), (30), (31); +INSERT INTO t12 VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); SELECT * FROM t12; a -10 @@ -219,6 +231,20 @@ 20 30 31 +CREATE TABLE t12u(a DOUBLE UNSIGNED AUTO_INCREMENT KEY) ENGINE = InnoDB; +INSERT INTO t12u VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); +ERROR 22003: Out of range value for column 'a' at row 5 +INSERT INTO t12u VALUES(0), (0), (0), (0), (0), (20), (30), (31); +SELECT * FROM t12u; +a +11 +12 +13 +14 +15 +20 +30 +31 # Scenario 1: Normal restart, to test if the counters are persisted # Scenario 2: Delete some values, to test the counters should not be the # one which is the largest in current table @@ -978,4 +1004,5 @@ 10 1 2 2 3 4 -DROP TABLE t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t30, t32, t33; +DROP TABLE t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t11u, t12u, +t30, t32, t33; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/buf_pool_resize_oom.result mariadb-10.11.13/mysql-test/suite/innodb/r/buf_pool_resize_oom.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/buf_pool_resize_oom.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/buf_pool_resize_oom.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -# -# Bug #21348684 SIGABRT DURING RESIZING THE INNODB BUFFER POOL -# ONLINE WITH MEMORY FULL CONDITION -# -call mtr.add_suppression("InnoDB: failed to allocate the chunk array"); -SET GLOBAL debug_dbug='+d,buf_pool_resize_chunk_null'; -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + 1048576; -# restart diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/doublewrite.result mariadb-10.11.13/mysql-test/suite/innodb/r/doublewrite.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/doublewrite.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/doublewrite.result 2025-05-19 16:14:24.000000000 +0000 @@ -11,9 +11,11 @@ commit work; SET GLOBAL innodb_fast_shutdown = 0; # restart +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0,innodb_max_dirty_pages_pct=0; +SET GLOBAL innodb_max_dirty_pages_pct=99; connect dml,localhost,root,,; XA START 'x'; -insert into t1 values (6, repeat('%', @@innodb_page_size/2)); +insert into t1 values(6, repeat('%', @@innodb_page_size/2)); XA END 'x'; XA PREPARE 'x'; disconnect dml; @@ -23,7 +25,6 @@ # restart FOUND 1 /InnoDB: Recovered page \[page id: space=[1-9][0-9]*, page number=0\]/ in mysqld.1.err # restart -XA ROLLBACK 'x'; check table t1; Table Op Msg_type Msg_text test.t1 check status OK @@ -34,18 +35,13 @@ 3 //////////// 4 ------------ 5 ............ -connect dml,localhost,root,,; -XA START 'x'; -insert into t1 values (6, repeat('%', @@innodb_page_size/2)); -XA END 'x'; -XA PREPARE 'x'; -disconnect dml; -connection default; -flush table t1 for export; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0,innodb_max_dirty_pages_pct=0; +SET GLOBAL innodb_max_dirty_pages_pct=99; +XA ROLLBACK 'x'; +FLUSH TABLE t1 FOR EXPORT; # Kill the server # restart FOUND 4 /InnoDB: Recovered page \[page id: space=[1-9][0-9]*, page number=[03]\]/ in mysqld.1.err -XA ROLLBACK 'x'; check table t1; Table Op Msg_type Msg_text test.t1 check status OK diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/foreign_key.result mariadb-10.11.13/mysql-test/suite/innodb/r/foreign_key.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/foreign_key.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/foreign_key.result 2025-05-19 16:14:24.000000000 +0000 @@ -155,7 +155,6 @@ FLUSH TABLES; # restart disconnect incomplete; -SET @save_stats_persistent = @@GLOBAL.innodb_stats_persistent; SET GLOBAL innodb_stats_persistent = 0; INSERT INTO child SET a=0; INSERT INTO child SET a=1; @@ -1182,6 +1181,25 @@ ALTER TABLE t2 ADD KEY(b), ALGORITHM=NOCOPY; DELETE FROM t1; DROP TABLE t2, t1; +# +# MDEV-33167 ASAN errors after failing to load foreign key +# relation for the table +# +call mtr.add_suppression("InnoDB: Load table `test`.`t3` failed, the table has missing foreign key indexes. Turn off 'foreign_key_checks' and try again."); +SET STATEMENT FOREIGN_KEY_CHECKS = 0 FOR +CREATE TABLE t1(f1 VARCHAR(8), +FOREIGN KEY(f1) REFERENCES test.t3(f1))ENGINE=InnoDB; +SET STATEMENT FOREIGN_KEY_CHECKS = 0 FOR +CREATE TABLE t2(f1 VARCHAR(8), +FOREIGN KEY(f1) REFERENCES test.t3(f1)) +ENGINE=InnoDB DEFAULT CHARSET=utf8mb3; +SET STATEMENT FOREIGN_KEY_CHECKS = 0 FOR +CREATE TABLE t3(f1 VARCHAR(8) PRIMARY KEY) +ENGINE=InnoDB DEFAULT CHARSET=latin1; +set GLOBAL innodb_fast_shutdown=0; +# restart +ALTER TABLE t2 FORCE; +DROP TABLE t2, t1, t3; # End of 10.6 tests CREATE TABLE t1 ( @@ -1204,5 +1222,4 @@ ADD UNIQUE INDEX(f3); ERROR HY000: Cannot delete rows from table which is parent in a foreign key constraint 't1_ibfk_1' of table 't1' drop table t1, t2; -SET GLOBAL innodb_stats_persistent = @save_stats_persistent; # End of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb-index-online.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb-index-online.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb-index-online.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb-index-online.result 2025-05-19 16:14:24.000000000 +0000 @@ -534,7 +534,6 @@ ROLLBACK; SET DEBUG_SYNC = 'now SIGNAL inserted'; connection con1; -disconnect con1; connection default; SELECT * FROM t1; a b @@ -543,6 +542,31 @@ Table Op Msg_type Msg_text test.t1 check status OK DROP TABLE t1; +# +# MDEV-36281 DML aborts during online virtual index +# +CREATE TABLE t1(f1 INT NOT NULL PRIMARY KEY, f2 INT NOT NULL, +f3 INT NOT NULL, f4 INT AS (f3) VIRTUAL, +f5 INT AS (f1) VIRTUAL, INDEX(f4))ENGINE=InnoDB; +INSERT INTO t1(f1, f2, f3) VALUES(1, 2, 3); +SET DEBUG_SYNC = 'innodb_inplace_alter_table_enter SIGNAL dml_start WAIT_FOR dml_finish'; +ALTER TABLE t1 ADD INDEX v1(f5, f2, f4), ADD INDEX v2(f3, f5); +connection con1; +set DEBUG_SYNC="now WAIT_FOR dml_start"; +UPDATE t1 SET f3= f3 + 1; +set DEBUG_SYNC="now SIGNAL dml_finish"; +disconnect con1; +connection default; +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT f5, f2, f4 FROM t1 USE INDEX(v1); +f5 f2 f4 +1 2 4 +SELECT f3, f5 FROM t1 USE INDEX(v2); +f3 f5 +4 1 +DROP TABLE t1; SET DEBUG_SYNC = 'RESET'; SET GLOBAL innodb_file_per_table = @global_innodb_file_per_table_orig; SET GLOBAL innodb_monitor_enable = default; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,4 @@ -call mtr.add_suppression("InnoDB: Cannot allocate memory for the buffer pool"); +call mtr.add_suppression("InnoDB: Cannot map innodb_buffer_pool_size_max="); call mtr.add_suppression("InnoDB: Plugin initialization aborted at srv0start.cc.*"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error."); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); @@ -6,4 +6,4 @@ # MDEV-25019 memory allocation failures during startup cause server failure in different, confusing ways # # restart: --debug_dbug=+d,ib_buf_chunk_init_fails -FOUND 1 /\[ERROR\] InnoDB: Cannot allocate memory for the buffer pool/ in mysqld.1.err +FOUND 1 /\[ERROR\] InnoDB: Cannot map innodb_buffer_pool_size_max=16m/ in mysqld.1.err diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,32 +1,51 @@ +# +# MDEV-29445: Reorganize buffer pool (and remove chunks) +# set global innodb_adaptive_hash_index=ON; select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size 8388608 +set global innodb_buffer_pool_size = 9437184; set global innodb_buffer_pool_size = 10485760; select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size 10485760 -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; -set @`v_id` := 0; -set @`v_val` := 0; -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; -set global innodb_buffer_pool_size = 64 * 1024 * 1024 + 512 * 1024; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '67633152' -select @@innodb_buffer_pool_size; -@@innodb_buffer_pool_size -68157440 +create table t1 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED; +create table t2 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=$kbs; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t1 SELECT seq*4,seq*4 FROM seq_1_to_262144; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t2 SELECT seq*4,seq*4 FROM seq_1_to_16384; +set global innodb_buffer_pool_size = 7340032; select count(val) from t1; count(val) 262144 +select count(val) from t2; +count(val) +16384 set global innodb_adaptive_hash_index=OFF; -set global innodb_buffer_pool_size = 25165824; +set global innodb_buffer_pool_size = 24117248; +set global innodb_buffer_pool_size = 26214400; +Warnings: +Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '26214400' select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size 25165824 select count(val) from t1; count(val) 262144 -drop table t1; -drop view view0; +select count(val) from t2; +count(val) +16384 +drop table t1,t2; +SET GLOBAL innodb_max_purge_lag_wait = 0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +SET GLOBAL innodb_buffer_pool_size = @old_innodb_buffer_pool_size; +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -SET @save_size=@@innodb_buffer_pool_size; -# -# MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize -# after or during DROP TABLE -# -select @@innodb_buffer_pool_chunk_size; -@@innodb_buffer_pool_chunk_size -1048576 -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; -SET GLOBAL innodb_buffer_pool_size=256*1024*1024; -DROP TABLE t1; -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size; -# End of 10.6 tests -SET GLOBAL innodb_buffer_pool_size=@save_size; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result 2025-05-19 16:14:24.000000000 +0000 @@ -4,7 +4,32 @@ SET GLOBAL innodb_buffer_pool_size=16777216; CREATE TEMPORARY TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 SELECT seq FROM seq_1_to_200; +SET GLOBAL innodb_max_purge_lag_wait=0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status +connect con1,localhost,root; +SET DEBUG_SYNC='buf_pool_shrink_before_wakeup SIGNAL blocked WAIT_FOR go'; SET GLOBAL innodb_buffer_pool_size=8388608; +connection default; +SET DEBUG_SYNC='now WAIT_FOR blocked'; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status Withdrawing blocks. (505/505). +SET DEBUG_SYNC='now SIGNAL go'; +connection con1; +disconnect con1; +connection default; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status +SET DEBUG_SYNC=RESET; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; SELECT COUNT(*),MIN(a),MAX(a) FROM t1; COUNT(*) MIN(a) MAX(a) 200 1 200 diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -select @@innodb_buffer_pool_chunk_size; -@@innodb_buffer_pool_chunk_size -4194304 -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; -set @`v_id` := 0; -set @`v_val` := 0; -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; -set global innodb_buffer_pool_size = 7340032; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '7340032' -select count(val) from t1; -count(val) -262144 -set global innodb_buffer_pool_size = 16777216; -select count(val) from t1; -count(val) -262144 -drop table t1; -drop view view0; -set global innodb_buffer_pool_size = 2*1048576; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '2097152' -select @@innodb_buffer_pool_size; -@@innodb_buffer_pool_size -4194304 diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_bug52663.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_bug52663.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_bug52663.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_bug52663.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,10 +1,11 @@ +SET @save_innodb_timeout=@@innodb_lock_wait_timeout; +SET GLOBAL innodb_lock_wait_timeout=1; set session transaction isolation level read committed; create table innodb_bug52663 (what varchar(5), id integer, count integer, primary key (what, id)) engine=innodb; insert into innodb_bug52663 values ('total', 0, 0); begin; connect addconroot, localhost, root,,; -connection addconroot; set session transaction isolation level read committed; begin; connection default; @@ -31,3 +32,4 @@ what id count total 0 2 drop table innodb_bug52663; +SET GLOBAL innodb_lock_wait_timeout=@save_innodb_timeout; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_row_lock_time_ms.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_row_lock_time_ms.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_row_lock_time_ms.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_row_lock_time_ms.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,10 +1,18 @@ CREATE TABLE `t`(`id` INT, PRIMARY KEY(`id`)) ENGINE=InnoDB STATS_PERSISTENT=0; INSERT INTO t VALUES (1); -SET GLOBAL innodb_monitor_reset = "module_innodb"; +SET GLOBAL innodb_monitor_disable="lock_row_lock_time"; +SET GLOBAL innodb_monitor_disable="lock_row_lock_time_max"; +SET GLOBAL innodb_monitor_reset_all='lock_row_lock_time'; +SET GLOBAL innodb_monitor_reset_all='lock_row_lock_time_max'; +SET GLOBAL innodb_monitor_enable="lock_row_lock_time"; +SET GLOBAL innodb_monitor_enable="lock_row_lock_time_max"; BEGIN; SELECT * FROM t FOR UPDATE; id 1 +SELECT @innodb_row_lock_time_before := variable_value +FROM information_schema.global_status +WHERE LOWER(variable_name) = 'innodb_row_lock_time'; connect con1,localhost,root,,; SET innodb_lock_wait_timeout = 1; SELECT * FROM t FOR UPDATE; @@ -12,29 +20,27 @@ disconnect con1; connection default; COMMIT; -SELECT variable_value > 100 FROM information_schema.global_status +SELECT variable_value - @innodb_row_lock_time_before > 100 +FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time'; -variable_value > 100 +variable_value - @innodb_row_lock_time_before > 100 1 -SELECT variable_value > 100 FROM information_schema.global_status +SELECT variable_value > 100 +FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_max'; variable_value > 100 1 -SELECT variable_value > 100 FROM information_schema.global_status -WHERE LOWER(variable_name) = 'innodb_row_lock_time_avg'; -variable_value > 100 -1 -SELECT count_reset > 100 FROM INFORMATION_SCHEMA.INNODB_METRICS -WHERE NAME="lock_row_lock_time"; -count_reset > 100 -1 -SELECT count_reset > 100 FROM INFORMATION_SCHEMA.INNODB_METRICS -WHERE NAME="lock_row_lock_time_max"; +SELECT count_reset > 100 +FROM INFORMATION_SCHEMA.INNODB_METRICS +WHERE NAME='lock_row_lock_time'; count_reset > 100 1 -SELECT count_reset > 100 FROM INFORMATION_SCHEMA.INNODB_METRICS -WHERE NAME="lock_row_lock_time_avg"; +SELECT count_reset > 100 +FROM INFORMATION_SCHEMA.INNODB_METRICS +WHERE NAME='lock_row_lock_time_max'; count_reset > 100 1 DROP TABLE t; -SET GLOBAL innodb_monitor_reset=default; +SET GLOBAL innodb_monitor_enable=default; +SET GLOBAL innodb_monitor_disable=default; +SET GLOBAL innodb_monitor_reset_all=default; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_stats_auto_recalc_on_nonexistent.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_stats_auto_recalc_on_nonexistent.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_stats_auto_recalc_on_nonexistent.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_stats_auto_recalc_on_nonexistent.result 2025-05-19 16:14:24.000000000 +0000 @@ -5,13 +5,13 @@ SELECT COUNT(*) FROM mysql.innodb_index_stats WHERE table_name = 't'; COUNT(*) 3 SELECT * FROM t; -FLUSH TABLE t; DELETE FROM mysql.innodb_index_stats WHERE table_name = 't'; DELETE FROM mysql.innodb_table_stats WHERE table_name = 't'; SELECT COUNT(*) FROM mysql.innodb_table_stats WHERE table_name = 't'; COUNT(*) 0 SELECT COUNT(*) FROM mysql.innodb_index_stats WHERE table_name = 't'; COUNT(*) 0 +RENAME TABLE t TO tmp, tmp TO t; SELECT * FROM t; SELECT COUNT(*) FROM mysql.innodb_table_stats WHERE table_name = 't'; COUNT(*) 1 @@ -25,13 +25,13 @@ SELECT COUNT(*) FROM mysql.innodb_index_stats WHERE table_name = 't'; COUNT(*) 3 SELECT * FROM t; -FLUSH TABLE t; DELETE FROM mysql.innodb_index_stats WHERE table_name = 't'; DELETE FROM mysql.innodb_table_stats WHERE table_name = 't'; SELECT COUNT(*) FROM mysql.innodb_table_stats WHERE table_name = 't'; COUNT(*) 0 SELECT COUNT(*) FROM mysql.innodb_index_stats WHERE table_name = 't'; COUNT(*) 0 +RENAME TABLE t TO tmp, tmp TO t; SELECT * FROM t; SELECT COUNT(*) FROM mysql.innodb_table_stats WHERE table_name = 't'; COUNT(*) 1 @@ -45,13 +45,13 @@ SELECT COUNT(*) FROM mysql.innodb_index_stats WHERE table_name = 't'; COUNT(*) 3 SELECT * FROM t; -FLUSH TABLE t; DELETE FROM mysql.innodb_index_stats WHERE table_name = 't'; DELETE FROM mysql.innodb_table_stats WHERE table_name = 't'; SELECT COUNT(*) FROM mysql.innodb_table_stats WHERE table_name = 't'; COUNT(*) 0 SELECT COUNT(*) FROM mysql.innodb_index_stats WHERE table_name = 't'; COUNT(*) 0 +RENAME TABLE t TO tmp, tmp TO t; SELECT * FROM t; SELECT COUNT(*) FROM mysql.innodb_table_stats WHERE table_name = 't'; COUNT(*) 0 diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_stats_fetch.result mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_stats_fetch.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/innodb_stats_fetch.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/innodb_stats_fetch.result 2025-05-19 16:14:24.000000000 +0000 @@ -125,7 +125,7 @@ table_name = 'test_ps_fetch' AND index_name = 'idx' AND stat_name = 'n_diff_pfx02'; -FLUSH TABLE test_ps_fetch; +RENAME TABLE test_ps_fetch TO tmp, tmp TO test_ps_fetch; SELECT seq_in_index, column_name, cardinality FROM information_schema.statistics WHERE table_name = 'test_ps_fetch' ORDER BY index_name, seq_in_index; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/instant_alter_debug,redundant.rdiff mariadb-10.11.13/mysql-test/suite/innodb/r/instant_alter_debug,redundant.rdiff --- mariadb-10.11.11/mysql-test/suite/innodb/r/instant_alter_debug,redundant.rdiff 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/instant_alter_debug,redundant.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -1,8 +1,9 @@ -@@ -527,6 +527,6 @@ +@@ -576,7 +576,7 @@ FROM information_schema.global_status WHERE variable_name = 'innodb_instant_alter_column'; instants -37 +38 - SET GLOBAL innodb_stats_persistent = @save_stats_persistent; - # End of 10.6 tests + CREATE TABLE t1(f1 INT, f2 TEXT)ENGINE=InnoDB; + INSERT INTO t1 VALUES(1, 'a'); + ALTER TABLE t1 ADD COLUMN f3 TEXT FIRST; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/instant_alter_debug.result mariadb-10.11.13/mysql-test/suite/innodb/r/instant_alter_debug.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/instant_alter_debug.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/instant_alter_debug.result 2025-05-19 16:14:24.000000000 +0000 @@ -575,5 +575,16 @@ WHERE variable_name = 'innodb_instant_alter_column'; instants 37 +CREATE TABLE t1(f1 INT, f2 TEXT)ENGINE=InnoDB; +INSERT INTO t1 VALUES(1, 'a'); +ALTER TABLE t1 ADD COLUMN f3 TEXT FIRST; +SET STATEMENT DEBUG_DBUG="+d,instant_insert_fail" FOR +ALTER TABLE t1 DROP COLUMN f1; +ERROR HY000: Internal error: InnoDB: Insert into SYS_COLUMNS failed +ALTER TABLE t1 DROP COLUMN f1; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +DROP TABLE t1; SET GLOBAL innodb_stats_persistent = @save_stats_persistent; # End of 10.6 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/lock_isolation.result mariadb-10.11.13/mysql-test/suite/innodb/r/lock_isolation.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/lock_isolation.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/lock_isolation.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,3 +1,6 @@ +connect disable_purging,localhost,root; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; # # MDEV-26642 Weird SELECT view when a record is # modified to the same value by two transactions @@ -52,15 +55,17 @@ # MDEV-26643 Inconsistent behaviors of UPDATE under # READ UNCOMMITTED and READ COMMITTED isolation level # -CREATE TABLE t(a INT, b INT) ENGINE=InnoDB; +CREATE TABLE t(a INT, b INT) ENGINE=InnoDB STATS_PERSISTENT=0; INSERT INTO t VALUES(NULL, 1), (2, 2); SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; BEGIN; UPDATE t SET a = 10; connection consistent; SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; UPDATE t SET b = 20 WHERE a; connection default; +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; COMMIT; connection consistent; SELECT * FROM t; @@ -74,8 +79,10 @@ UPDATE t SET a = 10; connection consistent; SET TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; UPDATE t SET b = 20 WHERE a; connection default; +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; COMMIT; connection consistent; SELECT * FROM t; @@ -89,8 +96,10 @@ UPDATE t SET a = 10; connection con_weird; SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; UPDATE t SET b = 20 WHERE a; connection default; +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; SELECT * FROM t; a b 10 1 @@ -113,8 +122,10 @@ connection consistent; SET TRANSACTION ISOLATION LEVEL READ COMMITTED; BEGIN; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; UPDATE t SET b = 2 WHERE a; connection default; +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; UPDATE t SET a = 1; COMMIT; connection consistent; @@ -128,20 +139,25 @@ # # MDEV-33802 Weird read view after ROLLBACK of other transactions # -CREATE TABLE t(a INT PRIMARY KEY, b INT UNIQUE) ENGINE=InnoDB; -INSERT INTO t SET a=1; -BEGIN; -INSERT INTO t SET a=2; +CREATE TABLE t(a INT PRIMARY KEY, b INT UNIQUE) ENGINE=InnoDB STATS_PERSISTENT=0; connection consistent; START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; +INSERT INTO t SET a=1; +connection consistent; SAVEPOINT sp1; SELECT * FROM t FORCE INDEX (b) FOR UPDATE; ERROR HY000: Record has changed since last read in table 't' SAVEPOINT sp1; +connection default; +BEGIN; +INSERT INTO t SET a=2; connection con_weird; START TRANSACTION WITH CONSISTENT SNAPSHOT; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; SELECT * FROM t FORCE INDEX (b) FOR UPDATE; connection default; +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; ROLLBACK; connection con_weird; a b @@ -149,12 +165,74 @@ SELECT * FROM t FORCE INDEX (b) FOR UPDATE; a b 1 NULL +COMMIT; disconnect con_weird; connection consistent; SELECT * FROM t FORCE INDEX (b) FOR UPDATE; a b 1 NULL +COMMIT; +connection default; +TRUNCATE TABLE t; +# +# MDEV-36639 innodb_snapshot_isolation=1 gives error for not comitted row changes +# +INSERT INTO t VALUES (1,1),(2,2); +connection default; +# Case 1: Transaction A modifies a record, transaction B with snapshot +# isolation level is blocked by A, then A is committed. +# Expected behaviour: B gets ER_CHECKREAD. +BEGIN; +UPDATE t SET b=3 WHERE a = 1; +connection consistent; +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +SELECT * FROM t; +a b +1 1 +2 2 +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; +SELECT * FROM t WHERE a=1 FOR UPDATE; +connection default; +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; +COMMIT; +connection consistent; +ERROR HY000: Record has changed since last read in table 't' +# Case 2: Transaction A modifies a record, transaction B with snapshot +# isolation level is blocked by A, then A is rolled back. +# Expected behaviour: B continues execution. +connection default; +BEGIN; +UPDATE t SET b=4 WHERE a=1; +connection consistent; +BEGIN; +SELECT * FROM t; +a b +2 2 +1 3 +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; +SELECT * FROM t WHERE a=1 FOR UPDATE; +connection default; +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; +ROLLBACK; +connection consistent; +a b +1 3 +ROLLBACK; +# Case 3: Transaction B with snapshot isolation level started with +# consistent snapshot. Transaction A modifies a record and is committed. +# Both B tries to read modified by A record. +# Expected behavior: B gets ER_CHECKREAD. +connection consistent; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; +UPDATE t SET b=4 WHERE a=1; +connection consistent; +SELECT * FROM t WHERE a=1 FOR UPDATE; +ERROR HY000: Record has changed since last read in table 't' disconnect consistent; +disconnect disable_purging; connection default; +SET DEBUG_SYNC="RESET"; DROP TABLE t; # End of 10.6 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/lock_memory_debug.result mariadb-10.11.13/mysql-test/suite/innodb/r/lock_memory_debug.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/lock_memory_debug.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/lock_memory_debug.result 2025-05-19 16:14:24.000000000 +0000 @@ -5,7 +5,7 @@ CREATE TABLE t1 (col1 INT) ENGINE=InnoDB; INSERT INTO t1 VALUES (1),(2),(3),(4),(5); SET STATEMENT debug_dbug='+d,innodb_skip_lock_bitmap' FOR -INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g LIMIT 45000; +INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g; ERROR HY000: The total number of locks exceeds the lock table size SELECT COUNT(*) FROM t1; COUNT(*) diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/log_upgrade_101_flags.result mariadb-10.11.13/mysql-test/suite/innodb/r/log_upgrade_101_flags.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/log_upgrade_101_flags.result 2025-01-30 11:01:23.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/log_upgrade_101_flags.result 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,7 @@ call mtr.add_suppression("InnoDB: The change buffer is corrupted"); call mtr.add_suppression("InnoDB: Tablespace size stored in header is 768 pages, but the sum of data file sizes is 384 pages"); call mtr.add_suppression("InnoDB: adjusting FSP_SPACE_FLAGS of file"); -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=10M +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/mem_pressure,32bit.rdiff mariadb-10.11.13/mysql-test/suite/innodb/r/mem_pressure,32bit.rdiff --- mariadb-10.11.11/mysql-test/suite/innodb/r/mem_pressure,32bit.rdiff 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/mem_pressure,32bit.rdiff 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,11 @@ +--- mem_pressure.result ++++ mem_pressure,32bit.result +@@ -11,7 +11,7 @@ + @@GLOBAL.innodb_buffer_pool_size_auto_min, + @@GLOBAL.innodb_buffer_pool_size_max; + @@GLOBAL.innodb_buffer_pool_size @@GLOBAL.innodb_buffer_pool_size_auto_min @@GLOBAL.innodb_buffer_pool_size_max +-17825792 16777216 25165824 ++17825792 16777216 18874368 + CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; + SET GLOBAL innodb_limit_optimistic_insert_debug=2; + SET STATEMENT unique_checks=0, foreign_key_checks=0 FOR diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/mem_pressure.result mariadb-10.11.13/mysql-test/suite/innodb/r/mem_pressure.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/mem_pressure.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/mem_pressure.result 2025-05-19 16:14:24.000000000 +0000 @@ -4,23 +4,34 @@ set @save_dbug=@@debug_dbug; set @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; set GLOBAL innodb_max_purge_lag_wait=0; +SET @innodb_buffer_pool_size= @@GLOBAL.innodb_buffer_pool_size; +SET @innodb_buffer_pool_size_min= @@GLOBAL.innodb_buffer_pool_size_auto_min; +SELECT +@@GLOBAL.innodb_buffer_pool_size, +@@GLOBAL.innodb_buffer_pool_size_auto_min, +@@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size @@GLOBAL.innodb_buffer_pool_size_auto_min @@GLOBAL.innodb_buffer_pool_size_max +17825792 16777216 25165824 CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; SET GLOBAL innodb_limit_optimistic_insert_debug=2; SET STATEMENT unique_checks=0, foreign_key_checks=0 FOR INSERT INTO t1 SELECT * FROM seq_1_to_1000; SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit; DROP TABLE t1; -SELECT CAST(VARIABLE_VALUE AS INTEGER) INTO @dirty_prev -FROM INFORMATION_SCHEMA.GLOBAL_STATUS -WHERE VARIABLE_NAME='Innodb_buffer_pool_pages_dirty'; -set debug_dbug="d,trigger_garbage_collection"; -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size; -FOUND 1 /[Mm]emory pressure.*/ in mysqld.1.err -SELECT CAST(VARIABLE_VALUE AS INTEGER) < @dirty_prev AS LESS_DIRTY_IS_GOOD -FROM INFORMATION_SCHEMA.GLOBAL_STATUS -WHERE VARIABLE_NAME='Innodb_buffer_pool_pages_dirty'; -LESS_DIRTY_IS_GOOD +SET STATEMENT debug_dbug="d,trigger_garbage_collection" FOR +SET GLOBAL innodb_buffer_pool_size=@innodb_buffer_pool_size; +FOUND 1 /Memory pressure event disregarded.*/ in mysqld.1.err +SET STATEMENT debug_dbug="d,trigger_garbage_collection" FOR +SET GLOBAL innodb_buffer_pool_size_auto_min= +CAST(@innodb_buffer_pool_size/2 AS UNSIGNED), +innodb_buffer_pool_size=@innodb_buffer_pool_size; +Warnings: +Warning 1292 Truncated incorrect innodb_buffer_pool_size_auto_min value: '8912896' +select @@global.innodb_buffer_pool_size < @innodb_buffer_pool_size; +@@global.innodb_buffer_pool_size < @innodb_buffer_pool_size 1 -FOUND 1 /InnoDB: Memory pressure event freed.*/ in mysqld.1.err +FOUND 1 /InnoDB: Memory pressure event shrunk.*/ in mysqld.1.err set debug_dbug=@save_dbug; +SET GLOBAL innodb_buffer_pool_size= @innodb_buffer_pool_size; +SET GLOBAL innodb_buffer_pool_size_auto_min=@innodb_buffer_pool_size_min; # End of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/page_cleaner.result mariadb-10.11.13/mysql-test/suite/innodb/r/page_cleaner.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/page_cleaner.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/page_cleaner.result 2025-05-19 16:14:24.000000000 +0000 @@ -2,8 +2,21 @@ SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0; SET GLOBAL innodb_max_dirty_pages_pct=0.0; +CREATE TABLE t(a INT) ENGINE=InnoDB STATS_PERSISTENT=0; +connect prevent_purge,localhost,root; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; +SET GLOBAL innodb_max_purge_lag_wait=0; SET GLOBAL innodb_max_dirty_pages_pct=90.0; -CREATE TABLE t ENGINE=InnoDB SELECT * FROM seq_1_to_10000; +SELECT variable_value INTO @log_writes FROM information_schema.global_status +WHERE variable_name='innodb_log_writes'; +BEGIN; +ROLLBACK; +SELECT if(variable_value-@log_writes<500,'ok',variable_value-@log_writes) +FROM information_schema.global_status WHERE variable_name='innodb_log_writes'; +if(variable_value-@log_writes<500,'ok',variable_value-@log_writes) +ok +disconnect prevent_purge; SELECT variable_value>0 FROM information_schema.global_status WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; variable_value>0 diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/recovery_memory.result mariadb-10.11.13/mysql-test/suite/innodb/r/recovery_memory.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/recovery_memory.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/recovery_memory.result 2025-05-19 16:14:24.000000000 +0000 @@ -12,7 +12,7 @@ connect con1,localhost,root,,,; CALL dorepeat(); connection default; -# restart: --innodb_buffer_pool_size=5242880 +# restart: --innodb_buffer_pool_size=6m DROP TABLE t1; DROP PROCEDURE dorepeat; # diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/restart,16k.rdiff mariadb-10.11.13/mysql-test/suite/innodb/r/restart,16k.rdiff --- mariadb-10.11.11/mysql-test/suite/innodb/r/restart,16k.rdiff 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/restart,16k.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '5242879' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 5242880 for innodb_page_size=16384 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '5242879' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/restart,32k.rdiff mariadb-10.11.13/mysql-test/suite/innodb/r/restart,32k.rdiff --- mariadb-10.11.11/mysql-test/suite/innodb/r/restart,32k.rdiff 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/restart,32k.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '10485759' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 10485760 for innodb_page_size=32768 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '10485759' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/restart,4k.rdiff mariadb-10.11.13/mysql-test/suite/innodb/r/restart,4k.rdiff --- mariadb-10.11.11/mysql-test/suite/innodb/r/restart,4k.rdiff 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/restart,4k.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '2097151' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 2097152 for innodb_page_size=4096 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '2097151' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/restart,64k.rdiff mariadb-10.11.13/mysql-test/suite/innodb/r/restart,64k.rdiff --- mariadb-10.11.11/mysql-test/suite/innodb/r/restart,64k.rdiff 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/restart,64k.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '20971519' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 20971520 for innodb_page_size=65536 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '20971519' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/restart,8k.rdiff mariadb-10.11.13/mysql-test/suite/innodb/r/restart,8k.rdiff --- mariadb-10.11.11/mysql-test/suite/innodb/r/restart,8k.rdiff 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/restart,8k.rdiff 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '3145727' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 3145728 for innodb_page_size=8192 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '3145727' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/restart.result mariadb-10.11.13/mysql-test/suite/innodb/r/restart.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/restart.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/restart.result 2025-05-19 16:14:24.000000000 +0000 @@ -30,19 +30,6 @@ a DROP TABLE tr,tc,td; # -# MDEV-27467 innodb to enfore the minimum innodb_buffer_pool_size in SET (resize) the same as startup -# -SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; -SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); -ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -SHOW WARNINGS; -Level Code Message -Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE -Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); -SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; -# # MDEV-27882 Innodb - recognise MySQL-8.0 innodb flags and give a specific error message # FOUND 1 /InnoDB: MySQL-8\.0 tablespace in \./ibdata1/ in attempted_start.err diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/stat_tables.result mariadb-10.11.13/mysql-test/suite/innodb/r/stat_tables.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/stat_tables.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/stat_tables.result 2025-05-19 16:14:24.000000000 +0000 @@ -101,3 +101,13 @@ CREATE TABLE t1 (c1 INT) ENGINE=InnoDB STATS_PERSISTENT 1; DROP TABLE t1; # End of 10.6 tests +# +# MDEV-36373 Warning: ... persistent statistics storage is corrupted +# +CREATE TABLE t1 (c INT) ENGINE=InnoDB; +SET STATEMENT tx_read_only=1 FOR ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +DROP TABLE t1; +# End of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/r/stats_persistent.result mariadb-10.11.13/mysql-test/suite/innodb/r/stats_persistent.result --- mariadb-10.11.11/mysql-test/suite/innodb/r/stats_persistent.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/r/stats_persistent.result 2025-05-19 16:14:24.000000000 +0000 @@ -17,3 +17,13 @@ test.t1 analyze status OK SET DEBUG_SYNC= 'RESET'; DROP TABLE t1; +# +# MDEV-36649 dict_acquire_mdl_shared() aborts when table +# mode is DICT_TABLE_OP_OPEN_ONLY_IF_CACHED +# +set @old_defragment_stats_accuracy= @@innodb_defragment_stats_accuracy; +SET GLOBAL innodb_defragment_stats_accuracy=1; +CREATE TABLE t (a INT ) ENGINE=INNODB; +INSERT INTO t SELECT * FROM seq_1_to_1000; +DROP TABLE t; +set global innodb_defragment_stats_accuracy= @old_defragment_stats_accuracy; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/alter_copy_bulk.test mariadb-10.11.13/mysql-test/suite/innodb/t/alter_copy_bulk.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/alter_copy_bulk.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/alter_copy_bulk.test 2025-05-19 16:14:24.000000000 +0000 @@ -109,3 +109,24 @@ ALTER TABLE t1 FORCE, ALGORITHM=COPY; DROP TABLE t1; SET GLOBAL innodb_stats_persistent=@default_stats_persistent; + +--echo # +--echo # MDEV-36504 Memory leak after insert into empty table +--echo # +CREATE TABLE t1 (k INT PRIMARY KEY)ENGINE=InnoDB; +INSERT INTO t1 SET k= 1; +START TRANSACTION; +INSERT INTO t1 SET k= 2; +SELECT COUNT(*) > 0 FROM mysql.innodb_index_stats LOCK IN SHARE MODE; + +connect(con1,localhost,root,,,); +SET innodb_lock_wait_timeout=0; +--error ER_LOCK_WAIT_TIMEOUT +CREATE TABLE t2(f1 INT DEFAULT 1 PRIMARY KEY) + STATS_PERSISTENT= 1 ENGINE=InnoDB as SELECT k FROM t1; +disconnect con1; +connection default; +SET innodb_lock_wait_timeout=default; +DROP TABLE t1; +DROP TABLE IF EXISTS t2; +--source include/restart_mysqld.inc diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/alter_partitioned_debug.test mariadb-10.11.13/mysql-test/suite/innodb/t/alter_partitioned_debug.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/alter_partitioned_debug.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/alter_partitioned_debug.test 2025-05-19 16:14:24.000000000 +0000 @@ -4,6 +4,7 @@ --source include/have_debug_sync.inc CREATE TABLE t1 (a INT, b VARCHAR(10)) ENGINE=InnoDB +STATS_PERSISTENT=1 STATS_AUTO_RECALC=0 PARTITION BY RANGE(a) (PARTITION pa VALUES LESS THAN (3), PARTITION pb VALUES LESS THAN (5)); @@ -26,9 +27,46 @@ connection default; DELETE FROM t1; -disconnect ddl; SET DEBUG_SYNC = 'RESET'; CHECK TABLE t1; -DROP TABLE t1; + +CREATE TABLE t(a INT, b VARCHAR(10)) ENGINE=InnoDB +STATS_PERSISTENT=1 STATS_AUTO_RECALC=1; +RENAME TABLE t TO u; +DELETE FROM mysql.innodb_table_stats WHERE table_name='u'; +DELETE FROM mysql.innodb_index_stats WHERE table_name='u'; + +send SET STATEMENT debug_dbug='+d,dict_stats_save_exit_notify_and_wait' FOR +SELECT * FROM u; + +connection ddl; +SET DEBUG_SYNC='open_tables_after_open_and_process_table +WAIT_FOR dict_stats_save_finished'; +send ALTER TABLE t1 EXCHANGE PARTITION pb WITH TABLE u; + +connect sync,localhost,root; +let $wait_condition= + select count(*) = 1 from information_schema.processlist + where state = 'debug sync point: now' + and info like 'SET STATEMENT debug_dbug%SELECT * FROM u'; +--source include/wait_condition.inc +let $wait_condition= + select count(*) = 1 from information_schema.processlist + where state = 'Waiting for table metadata lock' + and info like 'ALTER TABLE t1 EXCHANGE PARTITION pb WITH TABLE u'; +--source include/wait_condition.inc +SET DEBUG_SYNC='now SIGNAL dict_stats_save_unblock'; +disconnect sync; + +connection default; +reap; +connection ddl; +reap; +disconnect ddl; +connection default; +SELECT * FROM u; +SET DEBUG_SYNC = 'RESET'; + +DROP TABLE t1,u; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/autoinc_persist.test mariadb-10.11.13/mysql-test/suite/innodb/t/autoinc_persist.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/autoinc_persist.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/autoinc_persist.test 2025-05-19 16:14:24.000000000 +0000 @@ -95,15 +95,25 @@ SELECT * FROM t10; eval CREATE TABLE t11(a FLOAT $AUTO_INCREMENT_KEY_a) ENGINE = InnoDB; -INSERT INTO t11 VALUES(0), (0), (0), (0), (-1), (-10), (0), -(20), (30), (31); +INSERT INTO t11 VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); SELECT * FROM t11; +eval CREATE TABLE t11u(a FLOAT UNSIGNED $AUTO_INCREMENT_KEY_a) ENGINE = InnoDB; +--error ER_WARN_DATA_OUT_OF_RANGE +INSERT INTO t11u VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); +INSERT INTO t11u VALUES(0), (0), (0), (0), (0), (20), (30), (31); +SELECT * FROM t11u; + eval CREATE TABLE t12(a DOUBLE $AUTO_INCREMENT_KEY_a) ENGINE = InnoDB; -INSERT INTO t12 VALUES(0), (0), (0), (0), (-1), (-10), (0), -(20), (30), (31); +INSERT INTO t12 VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); SELECT * FROM t12; +CREATE TABLE t12u(a DOUBLE UNSIGNED AUTO_INCREMENT KEY) ENGINE = InnoDB; +--error ER_WARN_DATA_OUT_OF_RANGE +INSERT INTO t12u VALUES(0), (0), (0), (0), (-1), (-10), (0), (20), (30), (31); +INSERT INTO t12u VALUES(0), (0), (0), (0), (0), (20), (30), (31); +SELECT * FROM t12u; + --echo # Scenario 1: Normal restart, to test if the counters are persisted --echo # Scenario 2: Delete some values, to test the counters should not be the --echo # one which is the largest in current table @@ -566,4 +576,5 @@ SELECT MAX(b) AS `Expect 4` FROM t33; SELECT * FROM t33; -DROP TABLE t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t30, t32, t33; +DROP TABLE t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t11u, t12u, +t30, t32, t33; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt mariadb-10.11.13/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ ---innodb-buffer-pool-size=8m --innodb-buffer-pool-chunk-size=1m diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/buf_pool_resize_oom.test mariadb-10.11.13/mysql-test/suite/innodb/t/buf_pool_resize_oom.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/buf_pool_resize_oom.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/buf_pool_resize_oom.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,27 +0,0 @@ ---source include/have_innodb.inc ---source include/have_debug.inc ---source include/not_embedded.inc - ---echo # ---echo # Bug #21348684 SIGABRT DURING RESIZING THE INNODB BUFFER POOL ---echo # ONLINE WITH MEMORY FULL CONDITION ---echo # - -call mtr.add_suppression("InnoDB: failed to allocate the chunk array"); - -SET GLOBAL debug_dbug='+d,buf_pool_resize_chunk_null'; - ---disable_warnings -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + 1048576; ---enable_warnings - -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 27) = 'Resizing buffer pool failed' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; - ---source include/wait_condition.inc -# Restart the server, because the buffer pool would not necessarily be -# shrunk afterwards even if we request it. ---source include/restart_mysqld.inc diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/doublewrite.combinations mariadb-10.11.13/mysql-test/suite/innodb/t/doublewrite.combinations --- mariadb-10.11.11/mysql-test/suite/innodb/t/doublewrite.combinations 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/doublewrite.combinations 2025-05-19 16:14:24.000000000 +0000 @@ -1,7 +1,9 @@ [strict_crc32] --innodb-checksum-algorithm=strict_crc32 --innodb-use-atomic-writes=0 +--innodb-undo-tablespaces=0 [strict_full_crc32] --innodb-checksum-algorithm=strict_full_crc32 --innodb-use-atomic-writes=0 +--innodb-undo-tablespaces=0 diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/doublewrite.test mariadb-10.11.13/mysql-test/suite/innodb/t/doublewrite.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/doublewrite.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/doublewrite.test 2025-05-19 16:14:24.000000000 +0000 @@ -42,10 +42,17 @@ SET GLOBAL innodb_fast_shutdown = 0; let $shutdown_timeout=; --source include/restart_mysqld.inc +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0,innodb_max_dirty_pages_pct=0; +let $wait_condition = +SELECT variable_value = 0 +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; +--source include/wait_condition.inc +SET GLOBAL innodb_max_dirty_pages_pct=99; --source ../include/no_checkpoint_start.inc connect (dml,localhost,root,,); XA START 'x'; -insert into t1 values (6, repeat('%', @@innodb_page_size/2)); +insert into t1 values(6, repeat('%', @@innodb_page_size/2)); XA END 'x'; XA PREPARE 'x'; disconnect dml; @@ -53,10 +60,12 @@ flush table t1 for export; -let $restart_parameters=; ---let CLEANUP_IF_CHECKPOINT=XA COMMIT 'x';drop table t1; +--let CLEANUP_IF_CHECKPOINT=drop table t1, unexpected_checkpoint; --source ../include/no_checkpoint_end.inc +--copy_file $MYSQLD_DATADIR/ibdata1 $MYSQLD_DATADIR/ibdata1.bak +--copy_file $MYSQLD_DATADIR/ib_logfile0 $MYSQLD_DATADIR/ib_logfile0.bak + perl; use IO::Handle; do "$ENV{MTR_SUITE_DIR}/include/crc32.pl"; @@ -145,6 +154,12 @@ --source include/shutdown_mysqld.inc let $shutdown_timeout=; # Corrupt the file in a better way. + +--remove_file $MYSQLD_DATADIR/ibdata1 +--remove_file $MYSQLD_DATADIR/ib_logfile0 +--move_file $MYSQLD_DATADIR/ibdata1.bak $MYSQLD_DATADIR/ibdata1 +--move_file $MYSQLD_DATADIR/ib_logfile0.bak $MYSQLD_DATADIR/ib_logfile0 + perl; use IO::Handle; my $fname= "$ENV{'MYSQLD_DATADIR'}test/t1.ibd"; @@ -157,22 +172,23 @@ close FILE; EOF --source include/start_mysqld.inc -XA ROLLBACK 'x'; check table t1; select f1, f2 from t1; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0,innodb_max_dirty_pages_pct=0; +let $wait_condition = +SELECT variable_value = 0 +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; +--source include/wait_condition.inc +SET GLOBAL innodb_max_dirty_pages_pct=99; --source ../include/no_checkpoint_start.inc -connect (dml,localhost,root,,); -XA START 'x'; -insert into t1 values (6, repeat('%', @@innodb_page_size/2)); -XA END 'x'; -XA PREPARE 'x'; -disconnect dml; -connection default; - -flush table t1 for export; +XA ROLLBACK 'x'; +FLUSH TABLE t1 FOR EXPORT; -let $restart_parameters=; +# If we are skipping the test at this point due to an unexpected +# checkpoint, we will already have tested a part of this functionality. +--let CLEANUP_IF_CHECKPOINT=drop table t1; --source ../include/no_checkpoint_end.inc # Zero out the first page in file and try to recover from dblwr @@ -186,7 +202,6 @@ --source include/start_mysqld.inc let SEARCH_PATTERN=InnoDB: Recovered page \\[page id: space=[1-9][0-9]*, page number=[03]\\]; --source include/search_pattern_in_file.inc -XA ROLLBACK 'x'; check table t1; select f1, f2 from t1; drop table t1; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/foreign_key.test mariadb-10.11.13/mysql-test/suite/innodb/t/foreign_key.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/foreign_key.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/foreign_key.test 2025-05-19 16:14:24.000000000 +0000 @@ -133,7 +133,6 @@ --let $shutdown_timeout= disconnect incomplete; -SET @save_stats_persistent = @@GLOBAL.innodb_stats_persistent; SET GLOBAL innodb_stats_persistent = 0; INSERT INTO child SET a=0; @@ -1245,6 +1244,33 @@ DELETE FROM t1; DROP TABLE t2, t1; +--echo # +--echo # MDEV-33167 ASAN errors after failing to load foreign key +--echo # relation for the table +--echo # +call mtr.add_suppression("InnoDB: Load table `test`.`t3` failed, the table has missing foreign key indexes. Turn off 'foreign_key_checks' and try again."); +SET STATEMENT FOREIGN_KEY_CHECKS = 0 FOR +CREATE TABLE t1(f1 VARCHAR(8), + FOREIGN KEY(f1) REFERENCES test.t3(f1))ENGINE=InnoDB; + +SET STATEMENT FOREIGN_KEY_CHECKS = 0 FOR +CREATE TABLE t2(f1 VARCHAR(8), + FOREIGN KEY(f1) REFERENCES test.t3(f1)) + ENGINE=InnoDB DEFAULT CHARSET=utf8mb3; + +SET STATEMENT FOREIGN_KEY_CHECKS = 0 FOR +CREATE TABLE t3(f1 VARCHAR(8) PRIMARY KEY) + ENGINE=InnoDB DEFAULT CHARSET=latin1; + +set GLOBAL innodb_fast_shutdown=0; +--let $shutdown_timeout= +--source include/restart_mysqld.inc +# Error encountered while loading the foreign key +# constraint for t3. t1 wasn't loaded into memory yet +# t2 failed to find index for foreign key relation +ALTER TABLE t2 FORCE; +DROP TABLE t2, t1, t3; + --echo # End of 10.6 tests CREATE TABLE t1 @@ -1270,7 +1296,5 @@ ALTER TABLE t2 ADD FOREIGN KEY (f2) REFERENCES t2 (f2), ADD UNIQUE INDEX(f3); drop table t1, t2; -SET GLOBAL innodb_stats_persistent = @save_stats_persistent; --echo # End of 10.11 tests ---source include/wait_until_count_sessions.inc diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb-index-online.opt mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-index-online.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb-index-online.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-index-online.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,5 @@ --loose-innodb-sort-buffer-size=64k --loose-innodb-online-alter-log-max-size=128k ---loose-innodb-buffer-pool-size=5M +--loose-innodb-buffer-pool-size=6M --loose-innodb-sys-indexes --loose-innodb-sys-fields diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb-index-online.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-index-online.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb-index-online.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-index-online.test 2025-05-19 16:14:24.000000000 +0000 @@ -510,12 +510,35 @@ connection con1; reap; -disconnect con1; connection default; SELECT * FROM t1; CHECK TABLE t1; DROP TABLE t1; + +--echo # +--echo # MDEV-36281 DML aborts during online virtual index +--echo # +CREATE TABLE t1(f1 INT NOT NULL PRIMARY KEY, f2 INT NOT NULL, + f3 INT NOT NULL, f4 INT AS (f3) VIRTUAL, + f5 INT AS (f1) VIRTUAL, INDEX(f4))ENGINE=InnoDB; +INSERT INTO t1(f1, f2, f3) VALUES(1, 2, 3); +SET DEBUG_SYNC = 'innodb_inplace_alter_table_enter SIGNAL dml_start WAIT_FOR dml_finish'; +send ALTER TABLE t1 ADD INDEX v1(f5, f2, f4), ADD INDEX v2(f3, f5); + +connection con1; +set DEBUG_SYNC="now WAIT_FOR dml_start"; +UPDATE t1 SET f3= f3 + 1; +set DEBUG_SYNC="now SIGNAL dml_finish"; + +disconnect con1; +connection default; +reap; +CHECK TABLE t1 EXTENDED; +SELECT f5, f2, f4 FROM t1 USE INDEX(v1); +SELECT f3, f5 FROM t1 USE INDEX(v2); +DROP TABLE t1; + SET DEBUG_SYNC = 'RESET'; # Check that all connections opened by test cases in this file are really diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb-table-online-master.opt mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-table-online-master.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb-table-online-master.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb-table-online-master.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1 +1 @@ ---innodb-sort-buffer-size=64k --innodb-online-alter-log-max-size=512k --innodb-buffer-pool-size=5M +--innodb-sort-buffer-size=64k --innodb-online-alter-log-max-size=512k --innodb-buffer-pool-size=6M diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=16m diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,6 +1,6 @@ --source include/have_innodb.inc --source include/have_debug.inc -call mtr.add_suppression("InnoDB: Cannot allocate memory for the buffer pool"); +call mtr.add_suppression("InnoDB: Cannot map innodb_buffer_pool_size_max="); call mtr.add_suppression("InnoDB: Plugin initialization aborted at srv0start.cc.*"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error."); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); @@ -10,5 +10,5 @@ let restart_parameters=--debug_dbug=+d,ib_buf_chunk_init_fails; --source include/restart_mysqld.inc let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; -let SEARCH_PATTERN=\[ERROR\] InnoDB: Cannot allocate memory for the buffer pool; +let SEARCH_PATTERN=\[ERROR\] InnoDB: Cannot map innodb_buffer_pool_size_max=16m; --source include/search_pattern_in_file.inc diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1,2 +1,3 @@ --innodb-buffer-pool-size=8M +--innodb-buffer-pool-size-max=25M --innodb-page-size=4k diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,17 +1,13 @@ -# -# WL6117 : Resize the InnoDB Buffer Pool Online -# - --source include/have_innodb.inc ---source include/big_test.inc +--source include/have_sequence.inc -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; +--echo # +--echo # MDEV-29445: Reorganize buffer pool (and remove chunks) +--echo # --disable_query_log +call mtr.add_suppression("InnoDB: Over 67 percent of the buffer pool is occupied by lock heaps"); +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); set @old_innodb_buffer_pool_size = @@innodb_buffer_pool_size; set @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index; --enable_query_log @@ -21,52 +17,63 @@ select @@innodb_buffer_pool_size; # Expand buffer pool +set global innodb_buffer_pool_size = 9437184; set global innodb_buffer_pool_size = 10485760; ---source include/wait_condition.inc - select @@innodb_buffer_pool_size; +let $kbs=`SELECT CAST(@@innodb_page_size / 1024 AS INT)`; # fill buffer pool --disable_query_log SET @save_innodb_read_only_compressed=@@GLOBAL.innodb_read_only_compressed; SET GLOBAL innodb_read_only_compressed=OFF; --enable_query_log -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; +create table t1 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED; +evalp create table t2 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=$kbs; + +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t1 SELECT seq*4,seq*4 FROM seq_1_to_262144; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t2 SELECT seq*4,seq*4 FROM seq_1_to_16384; -set @`v_id` := 0; -set @`v_val` := 0; - -# 2^18 == 262144 records -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; --disable_query_log SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; --enable_query_log -# Shrink buffer pool -set global innodb_buffer_pool_size = 64 * 1024 * 1024 + 512 * 1024; ---source include/wait_condition.inc - -select @@innodb_buffer_pool_size; +# Attempt to shrink the buffer pool. This may occasionally fail. +--error 0,ER_WRONG_USAGE +set global innodb_buffer_pool_size = 7340032; select count(val) from t1; +select count(val) from t2; set global innodb_adaptive_hash_index=OFF; -# Expand buffer pool to 24MB -set global innodb_buffer_pool_size = 25165824; ---source include/wait_condition.inc +# Expand buffer pool to 23 and then 24 MiB (requesting 25 MiB) +set global innodb_buffer_pool_size = 24117248; +set global innodb_buffer_pool_size = 26214400; select @@innodb_buffer_pool_size; select count(val) from t1; +select count(val) from t2; -drop table t1; -drop view view0; +drop table t1,t2; ---disable_query_log -set global innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; -set global innodb_buffer_pool_size = @old_innodb_buffer_pool_size; ---enable_query_log +SET GLOBAL innodb_max_purge_lag_wait = 0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; + +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +let $wait_condition = +SELECT variable_value = 0 +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; --source include/wait_condition.inc +SET GLOBAL innodb_buffer_pool_size = @old_innodb_buffer_pool_size; +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ ---innodb-buffer-pool-chunk-size=1M ---loose-skip-innodb-disable-resize_buffer_pool_debug diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ ---source include/have_innodb.inc ---source include/big_test.inc - -SET @save_size=@@innodb_buffer_pool_size; - -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; - ---echo # ---echo # MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize ---echo # after or during DROP TABLE ---echo # - -select @@innodb_buffer_pool_chunk_size; -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; -SET GLOBAL innodb_buffer_pool_size=256*1024*1024; -DROP TABLE t1; ---source include/wait_condition.inc -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size; ---source include/wait_condition.inc - ---echo # End of 10.6 tests - -SET GLOBAL innodb_buffer_pool_size=@save_size; ---source include/wait_condition.inc diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ ---innodb-buffer-pool-size=8M --innodb-buffer-pool-chunk-size=2M diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=16m diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,24 +1,43 @@ --source include/have_innodb.inc --source include/have_sequence.inc --source include/have_debug.inc +--source include/have_debug_sync.inc SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; SET @save_size=@@GLOBAL.innodb_buffer_pool_size; SET GLOBAL innodb_limit_optimistic_insert_debug=2; - SET GLOBAL innodb_buffer_pool_size=16777216; CREATE TEMPORARY TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 SELECT seq FROM seq_1_to_200; -SET GLOBAL innodb_buffer_pool_size=8388608; +# Flush the buffer pool to prevent +# "innodb_buffer_pool_size change aborted" error with ./mtr --repeat=3 +SET GLOBAL innodb_max_purge_lag_wait=0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; + +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +connect con1,localhost,root; +SET DEBUG_SYNC='buf_pool_shrink_before_wakeup SIGNAL blocked WAIT_FOR go'; +send SET GLOBAL innodb_buffer_pool_size=8388608; +connection default; +SET DEBUG_SYNC='now WAIT_FOR blocked'; +# adjust for 32-bit and SUX_LOCK_GENERIC +--replace_regex /(5..)\/\1/505\/505/ +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +SET DEBUG_SYNC='now SIGNAL go'; +connection con1; +reap; +disconnect con1; +connection default; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +SET DEBUG_SYNC=RESET; -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; ---source include/wait_condition.inc +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; SELECT COUNT(*),MIN(a),MAX(a) FROM t1; DROP TEMPORARY TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ ---innodb-buffer-pool-size=16M ---innodb-buffer-pool-chunk-size=4M ---innodb-page-size=4k diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -# -# WL6117 : Resize the InnoDB Buffer Pool Online -# (innodb_buffer_pool_chunk_size used case) -# - ---source include/have_innodb.inc ---source include/big_test.inc - -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - ---disable_query_log -set @old_innodb_buffer_pool_size = @@innodb_buffer_pool_size; ---enable_query_log - -select @@innodb_buffer_pool_chunk_size; - -# fill buffer pool ---disable_query_log -SET @save_innodb_read_only_compressed=@@GLOBAL.innodb_read_only_compressed; -SET GLOBAL innodb_read_only_compressed=OFF; ---enable_query_log -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; - -set @`v_id` := 0; -set @`v_val` := 0; - -# 2^18 == 262144 records -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; ---disable_query_log -SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; ---enable_query_log - -# Shrink buffer pool to 7MB -set global innodb_buffer_pool_size = 7340032; ---source include/wait_condition.inc - -select count(val) from t1; - -# Expand buffer pool to 16MB -set global innodb_buffer_pool_size = 16777216; ---source include/wait_condition.inc - -select count(val) from t1; - -drop table t1; -drop view view0; - -# Try to shrink buffer pool to smaller than chunk size -set global innodb_buffer_pool_size = 2*1048576; ---source include/wait_condition.inc -select @@innodb_buffer_pool_size; - ---disable_query_log -set global innodb_buffer_pool_size = @old_innodb_buffer_pool_size; ---enable_query_log ---source include/wait_condition.inc diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_bug52663.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_bug52663.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_bug52663.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_bug52663.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,5 +1,7 @@ --source include/have_innodb.inc +SET @save_innodb_timeout=@@innodb_lock_wait_timeout; +SET GLOBAL innodb_lock_wait_timeout=1; set session transaction isolation level read committed; create table innodb_bug52663 (what varchar(5), id integer, count integer, primary key @@ -8,7 +10,6 @@ begin; connect (addconroot, localhost, root,,); -connection addconroot; set session transaction isolation level read committed; begin; @@ -32,3 +33,4 @@ connection default; select * from innodb_bug52663; drop table innodb_bug52663; +SET GLOBAL innodb_lock_wait_timeout=@save_innodb_timeout; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_row_lock_time_ms.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_row_lock_time_ms.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_row_lock_time_ms.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_row_lock_time_ms.test 2025-05-19 16:14:24.000000000 +0000 @@ -5,11 +5,26 @@ INSERT INTO t VALUES (1); -SET GLOBAL innodb_monitor_reset = "module_innodb"; +SET GLOBAL innodb_monitor_disable="lock_row_lock_time"; +SET GLOBAL innodb_monitor_disable="lock_row_lock_time_max"; +SET GLOBAL innodb_monitor_reset_all='lock_row_lock_time'; +SET GLOBAL innodb_monitor_reset_all='lock_row_lock_time_max'; +SET GLOBAL innodb_monitor_enable="lock_row_lock_time"; +SET GLOBAL innodb_monitor_enable="lock_row_lock_time_max"; BEGIN; SELECT * FROM t FOR UPDATE; +# We can't predict (innodb/lock)_row_lock_time_avg value, because it's counted +# as the whole waiting time divided by the amount of waits. The +# corresponding counters in lock_sys can't be reset with any query. + +--disable_result_log +SELECT @innodb_row_lock_time_before := variable_value + FROM information_schema.global_status + WHERE LOWER(variable_name) = 'innodb_row_lock_time'; +--enable_result_log + --connect(con1,localhost,root,,) SET innodb_lock_wait_timeout = 1; --error ER_LOCK_WAIT_TIMEOUT @@ -19,24 +34,28 @@ --connection default COMMIT; -SELECT variable_value > 100 FROM information_schema.global_status +SELECT variable_value - @innodb_row_lock_time_before > 100 + FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time'; -SELECT variable_value > 100 FROM information_schema.global_status +# We can't use 'variable_value - @innodb_row_lock_time_max_before' trick for +# innodb_row_lock_time_max, because we can't reset it, and we don't know the +# initial value at the moment of the test execution. +SELECT variable_value > 100 + FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_row_lock_time_max'; -SELECT variable_value > 100 FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_row_lock_time_avg'; - -SELECT count_reset > 100 FROM INFORMATION_SCHEMA.INNODB_METRICS - WHERE NAME="lock_row_lock_time"; -SELECT count_reset > 100 FROM INFORMATION_SCHEMA.INNODB_METRICS - WHERE NAME="lock_row_lock_time_max"; -SELECT count_reset > 100 FROM INFORMATION_SCHEMA.INNODB_METRICS - WHERE NAME="lock_row_lock_time_avg"; +SELECT count_reset > 100 + FROM INFORMATION_SCHEMA.INNODB_METRICS + WHERE NAME='lock_row_lock_time'; +SELECT count_reset > 100 + FROM INFORMATION_SCHEMA.INNODB_METRICS + WHERE NAME='lock_row_lock_time_max'; DROP TABLE t; --disable_warnings -SET GLOBAL innodb_monitor_reset=default; +SET GLOBAL innodb_monitor_enable=default; +SET GLOBAL innodb_monitor_disable=default; +SET GLOBAL innodb_monitor_reset_all=default; --enable_warnings --source include/wait_until_count_sessions.inc diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_stats_auto_recalc_on_nonexistent.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_stats_auto_recalc_on_nonexistent.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_stats_auto_recalc_on_nonexistent.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_stats_auto_recalc_on_nonexistent.test 2025-05-19 16:14:24.000000000 +0000 @@ -17,9 +17,7 @@ -- eval $check_stats1 -- eval $check_stats2 -# open and close the table SELECT * FROM t; -FLUSH TABLE t; DELETE FROM mysql.innodb_index_stats WHERE table_name = 't'; DELETE FROM mysql.innodb_table_stats WHERE table_name = 't'; @@ -27,7 +25,8 @@ -- eval $check_stats1 -- eval $check_stats2 -# open the table, causing stats recalc/save +# rename and open the table, causing stats recalc/save +RENAME TABLE t TO tmp, tmp TO t; SELECT * FROM t; -- eval $check_stats1 @@ -43,9 +42,7 @@ -- eval $check_stats1 -- eval $check_stats2 -# open and close the table SELECT * FROM t; -FLUSH TABLE t; DELETE FROM mysql.innodb_index_stats WHERE table_name = 't'; DELETE FROM mysql.innodb_table_stats WHERE table_name = 't'; @@ -53,7 +50,7 @@ -- eval $check_stats1 -- eval $check_stats2 -# open the table, causing stats recalc/save +RENAME TABLE t TO tmp, tmp TO t; SELECT * FROM t; -- eval $check_stats1 @@ -69,9 +66,7 @@ -- eval $check_stats1 -- eval $check_stats2 -# open and close the table SELECT * FROM t; -FLUSH TABLE t; DELETE FROM mysql.innodb_index_stats WHERE table_name = 't'; DELETE FROM mysql.innodb_table_stats WHERE table_name = 't'; @@ -79,7 +74,8 @@ -- eval $check_stats1 -- eval $check_stats2 -# open the table, stats should not be present, since autorecalc is disabled +# rename the table, stats should not be present, since autorecalc is disabled +RENAME TABLE t TO tmp, tmp TO t; SELECT * FROM t; -- eval $check_stats1 diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_stats_fetch.test mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_stats_fetch.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/innodb_stats_fetch.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/innodb_stats_fetch.test 2025-05-19 16:14:24.000000000 +0000 @@ -69,7 +69,7 @@ index_name = 'idx' AND stat_name = 'n_diff_pfx02'; -FLUSH TABLE test_ps_fetch; +RENAME TABLE test_ps_fetch TO tmp, tmp TO test_ps_fetch; SELECT seq_in_index, column_name, cardinality FROM information_schema.statistics WHERE table_name = 'test_ps_fetch' diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/instant_alter_debug.test mariadb-10.11.13/mysql-test/suite/innodb/t/instant_alter_debug.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/instant_alter_debug.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/instant_alter_debug.test 2025-05-19 16:14:24.000000000 +0000 @@ -657,11 +657,19 @@ SET DEBUG_SYNC=RESET; --echo # End of 10.5 tests - SELECT variable_value-@old_instant instants FROM information_schema.global_status WHERE variable_name = 'innodb_instant_alter_column'; -SET GLOBAL innodb_stats_persistent = @save_stats_persistent; +CREATE TABLE t1(f1 INT, f2 TEXT)ENGINE=InnoDB; +INSERT INTO t1 VALUES(1, 'a'); +ALTER TABLE t1 ADD COLUMN f3 TEXT FIRST; +--error ER_INTERNAL_ERROR +SET STATEMENT DEBUG_DBUG="+d,instant_insert_fail" FOR +ALTER TABLE t1 DROP COLUMN f1; +ALTER TABLE t1 DROP COLUMN f1; +CHECK TABLE t1; +DROP TABLE t1; +SET GLOBAL innodb_stats_persistent = @save_stats_persistent; --echo # End of 10.6 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/lock_isolation.test mariadb-10.11.13/mysql-test/suite/innodb/t/lock_isolation.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/lock_isolation.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/lock_isolation.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,9 +1,16 @@ --source include/have_innodb.inc +--source include/count_sessions.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc --disable_query_log call mtr.add_suppression("InnoDB: Transaction was aborted due to "); --enable_query_log +--connect disable_purging,localhost,root +START TRANSACTION WITH CONSISTENT SNAPSHOT; + +--connection default --echo # --echo # MDEV-26642 Weird SELECT view when a record is --echo # modified to the same value by two transactions @@ -41,22 +48,18 @@ --echo # READ UNCOMMITTED and READ COMMITTED isolation level --echo # -CREATE TABLE t(a INT, b INT) ENGINE=InnoDB; +CREATE TABLE t(a INT, b INT) ENGINE=InnoDB STATS_PERSISTENT=0; INSERT INTO t VALUES(NULL, 1), (2, 2); SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; BEGIN; UPDATE t SET a = 10; --connection consistent SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; --send UPDATE t SET b = 20 WHERE a --connection default -let $wait_condition= - select count(*) = 1 from information_schema.processlist - where state = 'Updating' - and info = 'UPDATE t SET b = 20 WHERE a'; ---source include/wait_condition.inc - +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; COMMIT; --connection consistent @@ -70,14 +73,11 @@ --connection consistent SET TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; --send UPDATE t SET b = 20 WHERE a --connection default -let $wait_condition= - select count(*) = 1 from information_schema.processlist - where info = 'UPDATE t SET b = 20 WHERE a'; ---source include/wait_condition.inc - +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; COMMIT; --connection consistent @@ -91,15 +91,11 @@ --connection con_weird SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; send UPDATE t SET b = 20 WHERE a; --connection default -let $wait_condition= - select count(*) = 1 from information_schema.processlist - where state = 'Updating' - and info = 'UPDATE t SET b = 20 WHERE a'; ---source include/wait_condition.inc - +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; SELECT * FROM t; COMMIT; @@ -123,14 +119,11 @@ BEGIN; # As semi-consistent read is disabled for innodb_snapshot_isolation=ON, the # following UPDATE must be blocked on the first record. +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; --send UPDATE t SET b = 2 WHERE a --connection default -let $wait_condition= - select count(*) = 1 from information_schema.processlist - where state = 'Updating' and info = 'UPDATE t SET b = 2 WHERE a'; ---source include/wait_condition.inc - +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; UPDATE t SET a = 1; COMMIT; --connection consistent @@ -149,13 +142,15 @@ --echo # MDEV-33802 Weird read view after ROLLBACK of other transactions --echo # -CREATE TABLE t(a INT PRIMARY KEY, b INT UNIQUE) ENGINE=InnoDB; -INSERT INTO t SET a=1; - -BEGIN; INSERT INTO t SET a=2; +CREATE TABLE t(a INT PRIMARY KEY, b INT UNIQUE) ENGINE=InnoDB STATS_PERSISTENT=0; --connection consistent START TRANSACTION WITH CONSISTENT SNAPSHOT; + +--connection default +INSERT INTO t SET a=1; + +--connection consistent SAVEPOINT sp1; --disable_ps2_protocol --error ER_CHECKREAD @@ -163,29 +158,100 @@ --enable_ps2_protocol SAVEPOINT sp1; +--connection default +BEGIN; INSERT INTO t SET a=2; + --connection con_weird START TRANSACTION WITH CONSISTENT SNAPSHOT; -send -SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; +--send SELECT * FROM t FORCE INDEX (b) FOR UPDATE --connection default -let $wait_condition= - select count(*) = 1 from information_schema.processlist - where state = 'Sending data' - and info LIKE 'SELECT * FROM t %'; ---source include/wait_condition.inc +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; ROLLBACK; --connection con_weird --reap SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +COMMIT; --disconnect con_weird --connection consistent SELECT * FROM t FORCE INDEX (b) FOR UPDATE; +COMMIT; + +--connection default +TRUNCATE TABLE t; + +--echo # +--echo # MDEV-36639 innodb_snapshot_isolation=1 gives error for not comitted row changes +--echo # +INSERT INTO t VALUES (1,1),(2,2); + +--connection default +--echo # Case 1: Transaction A modifies a record, transaction B with snapshot +--echo # isolation level is blocked by A, then A is committed. +--echo # Expected behaviour: B gets ER_CHECKREAD. +BEGIN; +UPDATE t SET b=3 WHERE a = 1; + +--connection consistent +SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; +BEGIN; +SELECT * FROM t; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; +--send SELECT * FROM t WHERE a=1 FOR UPDATE + +--connection default +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; +COMMIT; + +--connection consistent +--error ER_CHECKREAD +--reap + +--echo # Case 2: Transaction A modifies a record, transaction B with snapshot +--echo # isolation level is blocked by A, then A is rolled back. +--echo # Expected behaviour: B continues execution. + +--connection default +BEGIN; +UPDATE t SET b=4 WHERE a=1; + +--connection consistent +BEGIN; +SELECT * FROM t; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL select_blocked"; +--send SELECT * FROM t WHERE a=1 FOR UPDATE + +--connection default +SET DEBUG_SYNC="now WAIT_FOR select_blocked"; +ROLLBACK; + +--connection consistent +--reap +ROLLBACK; + +--echo # Case 3: Transaction B with snapshot isolation level started with +--echo # consistent snapshot. Transaction A modifies a record and is committed. +--echo # Both B tries to read modified by A record. +--echo # Expected behavior: B gets ER_CHECKREAD. + +--connection consistent +START TRANSACTION WITH CONSISTENT SNAPSHOT; + +--connection default +UPDATE t SET b=4 WHERE a=1; + +--connection consistent +--error ER_CHECKREAD +SELECT * FROM t WHERE a=1 FOR UPDATE; --disconnect consistent +--disconnect disable_purging --connection default +SET DEBUG_SYNC="RESET"; DROP TABLE t; +--source include/wait_until_count_sessions.inc --echo # End of 10.6 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/lock_memory_debug.opt mariadb-10.11.13/mysql-test/suite/innodb/t/lock_memory_debug.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/lock_memory_debug.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/lock_memory_debug.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1 +1 @@ ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/lock_memory_debug.test mariadb-10.11.13/mysql-test/suite/innodb/t/lock_memory_debug.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/lock_memory_debug.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/lock_memory_debug.test 2025-05-19 16:14:24.000000000 +0000 @@ -15,7 +15,7 @@ --error ER_LOCK_TABLE_FULL SET STATEMENT debug_dbug='+d,innodb_skip_lock_bitmap' FOR -INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g LIMIT 45000; +INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g; SELECT COUNT(*) FROM t1; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/log_upgrade_101_flags.test mariadb-10.11.13/mysql-test/suite/innodb/t/log_upgrade_101_flags.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/log_upgrade_101_flags.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/log_upgrade_101_flags.test 2025-05-19 16:14:24.000000000 +0000 @@ -73,7 +73,7 @@ close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=10M +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/mdev-15707.opt mariadb-10.11.13/mysql-test/suite/innodb/t/mdev-15707.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/mdev-15707.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/mdev-15707.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1 +1 @@ ---innodb --innodb-buffer-pool-size=5MB --innodb-read-io-threads=1 --innodb-doublewrite=0 --innodb-flush-log-at-trx-commit=0 \ No newline at end of file +--innodb --innodb-buffer-pool-size=6MB --innodb-read-io-threads=1 --innodb-doublewrite=0 --innodb-flush-log-at-trx-commit=0 \ No newline at end of file diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/mem_pressure.opt mariadb-10.11.13/mysql-test/suite/innodb/t/mem_pressure.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/mem_pressure.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/mem_pressure.opt 2025-05-19 16:14:24.000000000 +0000 @@ -0,0 +1,3 @@ +--loose-innodb-buffer-pool-size-auto-min=17m +--innodb-buffer-pool-size-max=17m +--innodb-buffer-pool-size=17m diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/mem_pressure.test mariadb-10.11.13/mysql-test/suite/innodb/t/mem_pressure.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/mem_pressure.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/mem_pressure.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,8 +1,8 @@ --source include/have_debug.inc ---source include/have_cgroupv2.inc --source include/not_embedded.inc --source include/have_innodb.inc --source include/have_sequence.inc +--source include/word_size.inc --echo # --echo # MDEV-24670 avoid OOM by linux kernel co-operative memory management @@ -15,6 +15,13 @@ # This is not an actual parameter, so there is no need to restore it. set GLOBAL innodb_max_purge_lag_wait=0; +SET @innodb_buffer_pool_size= @@GLOBAL.innodb_buffer_pool_size; +SET @innodb_buffer_pool_size_min= @@GLOBAL.innodb_buffer_pool_size_auto_min; +SELECT +@@GLOBAL.innodb_buffer_pool_size, +@@GLOBAL.innodb_buffer_pool_size_auto_min, +@@GLOBAL.innodb_buffer_pool_size_max; + CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; SET GLOBAL innodb_limit_optimistic_insert_debug=2; SET STATEMENT unique_checks=0, foreign_key_checks=0 FOR @@ -24,32 +31,31 @@ DROP TABLE t1; ---disable_cursor_protocol -SELECT CAST(VARIABLE_VALUE AS INTEGER) INTO @dirty_prev -FROM INFORMATION_SCHEMA.GLOBAL_STATUS -WHERE VARIABLE_NAME='Innodb_buffer_pool_pages_dirty'; ---enable_cursor_protocol - -set debug_dbug="d,trigger_garbage_collection"; -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size; +SET STATEMENT debug_dbug="d,trigger_garbage_collection" FOR +SET GLOBAL innodb_buffer_pool_size=@innodb_buffer_pool_size; let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; -# either a fail or the pressure event -let SEARCH_PATTERN= [Mm]emory pressure.*; +let SEARCH_PATTERN= Memory pressure event disregarded.*; +let SEARCH_WAIT= FOUND; --source include/search_pattern_in_file.inc +SET STATEMENT debug_dbug="d,trigger_garbage_collection" FOR +SET GLOBAL innodb_buffer_pool_size_auto_min= +CAST(@innodb_buffer_pool_size/2 AS UNSIGNED), +innodb_buffer_pool_size=@innodb_buffer_pool_size; + # The garbage collection happens asynchronously after trigger, in a background # thread. So wait for it to happen to avoid sporadic failure. let $wait_condition= - SELECT CAST(VARIABLE_VALUE AS INTEGER) < @dirty_prev AS LESS_DIRTY_IS_GOOD - FROM INFORMATION_SCHEMA.GLOBAL_STATUS - WHERE VARIABLE_NAME='Innodb_buffer_pool_pages_dirty'; + select @@global.innodb_buffer_pool_size < @innodb_buffer_pool_size; --source include/wait_condition.inc eval $wait_condition; -let SEARCH_PATTERN= InnoDB: Memory pressure event freed.*; +let SEARCH_PATTERN= InnoDB: Memory pressure event shrunk.*; let SEARCH_WAIT= FOUND; --source include/search_pattern_in_file.inc set debug_dbug=@save_dbug; +SET GLOBAL innodb_buffer_pool_size= @innodb_buffer_pool_size; +SET GLOBAL innodb_buffer_pool_size_auto_min=@innodb_buffer_pool_size_min; --echo # End of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/page_cleaner.test mariadb-10.11.13/mysql-test/suite/innodb/t/page_cleaner.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/page_cleaner.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/page_cleaner.test 2025-05-19 16:14:24.000000000 +0000 @@ -7,6 +7,12 @@ SET GLOBAL innodb_max_dirty_pages_pct_lwm=0.0; SET GLOBAL innodb_max_dirty_pages_pct=0.0; +CREATE TABLE t(a INT) ENGINE=InnoDB STATS_PERSISTENT=0; +--connect (prevent_purge,localhost,root) +START TRANSACTION WITH CONSISTENT SNAPSHOT; +--connection default +SET GLOBAL innodb_max_purge_lag_wait=0; + let $wait_condition = SELECT variable_value = 0 FROM information_schema.global_status @@ -15,7 +21,24 @@ SET GLOBAL innodb_max_dirty_pages_pct=90.0; -CREATE TABLE t ENGINE=InnoDB SELECT * FROM seq_1_to_10000; +--disable_cursor_protocol +SELECT variable_value INTO @log_writes FROM information_schema.global_status +WHERE variable_name='innodb_log_writes'; +--enable_cursor_protocol + +BEGIN; +--disable_query_log +let $N=500; +while ($N) { + INSERT INTO t SELECT * FROM seq_1_to_10; + dec $N; +} +--enable_query_log +ROLLBACK; + +SELECT if(variable_value-@log_writes<500,'ok',variable_value-@log_writes) +FROM information_schema.global_status WHERE variable_name='innodb_log_writes'; +--disconnect prevent_purge SELECT variable_value>0 FROM information_schema.global_status WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/purge_secondary.opt mariadb-10.11.13/mysql-test/suite/innodb/t/purge_secondary.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/purge_secondary.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/purge_secondary.opt 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,4 @@ --innodb-sys-tablestats ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M --innodb_monitor_enable=module_buffer --skip-innodb-stats-persistent diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/recovery_memory.test mariadb-10.11.13/mysql-test/suite/innodb/t/recovery_memory.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/recovery_memory.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/recovery_memory.test 2025-05-19 16:14:24.000000000 +0000 @@ -22,7 +22,7 @@ connection default; sleep 10; let $shutdown_timeout=0; -let $restart_parameters=--innodb_buffer_pool_size=5242880; +let $restart_parameters=--innodb_buffer_pool_size=6m; --source include/restart_mysqld.inc DROP TABLE t1; DROP PROCEDURE dorepeat; @@ -33,11 +33,11 @@ --echo # if ($have_debug) { SET DEBUG_DBUG="+d,ib_log_checkpoint_avoid_hard"; -let $restart_parameters=--innodb_buffer_pool_size=5242880 --debug_dbug=+d,ibuf_init_corrupt; +let $restart_parameters=--innodb_buffer_pool_size=6m --debug_dbug=+d,ibuf_init_corrupt; } if (!$have_debug) { --echo SET DEBUG_DBUG="+d,ib_log_checkpoint_avoid_hard"; -let $restart_parameters=--innodb_buffer_pool_size=5242880; +let $restart_parameters=--innodb_buffer_pool_size=6m; } CREATE TABLE t1(f1 INT NOT NULL)ENGINE=InnoDB; INSERT INTO t1 SELECT * FROM seq_1_to_65536; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/restart.opt mariadb-10.11.13/mysql-test/suite/innodb/t/restart.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/restart.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/restart.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ ---loose-innodb_disable_resize_buffer_pool_debug=0 ---innodb-buffer-pool-chunk-size=1M diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/restart.test mariadb-10.11.13/mysql-test/suite/innodb/t/restart.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/restart.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/restart.test 2025-05-19 16:14:24.000000000 +0000 @@ -93,31 +93,6 @@ DROP TABLE tr,tc,td; --echo # ---echo # MDEV-27467 innodb to enfore the minimum innodb_buffer_pool_size in SET (resize) the same as startup ---echo # - -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - ---disable_cursor_protocol -SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; -SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; ---enable_cursor_protocol ---error ER_WRONG_VALUE_FOR_VAR -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); - -SHOW WARNINGS; - -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - ---source include/wait_condition.inc - -SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; - ---echo # --echo # MDEV-27882 Innodb - recognise MySQL-8.0 innodb flags and give a specific error message --echo # diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/stat_tables.test mariadb-10.11.13/mysql-test/suite/innodb/t/stat_tables.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/stat_tables.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/stat_tables.test 2025-05-19 16:14:24.000000000 +0000 @@ -110,3 +110,12 @@ DROP TABLE t1; --echo # End of 10.6 tests + +--echo # +--echo # MDEV-36373 Warning: ... persistent statistics storage is corrupted +--echo # +CREATE TABLE t1 (c INT) ENGINE=InnoDB; +SET STATEMENT tx_read_only=1 FOR ANALYZE TABLE t1; +DROP TABLE t1; + +--echo # End of 10.11 tests diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/stats_persistent.test mariadb-10.11.13/mysql-test/suite/innodb/t/stats_persistent.test --- mariadb-10.11.11/mysql-test/suite/innodb/t/stats_persistent.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/stats_persistent.test 2025-05-19 16:14:24.000000000 +0000 @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/have_sequence.inc --source include/have_debug.inc --source include/have_debug_sync.inc --source include/count_sessions.inc @@ -26,3 +27,14 @@ DROP TABLE t1; --source include/wait_until_count_sessions.inc + +--echo # +--echo # MDEV-36649 dict_acquire_mdl_shared() aborts when table +--echo # mode is DICT_TABLE_OP_OPEN_ONLY_IF_CACHED +--echo # +set @old_defragment_stats_accuracy= @@innodb_defragment_stats_accuracy; +SET GLOBAL innodb_defragment_stats_accuracy=1; +CREATE TABLE t (a INT ) ENGINE=INNODB; +INSERT INTO t SELECT * FROM seq_1_to_1000; +DROP TABLE t; +set global innodb_defragment_stats_accuracy= @old_defragment_stats_accuracy; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb/t/update_time-master.opt mariadb-10.11.13/mysql-test/suite/innodb/t/update_time-master.opt --- mariadb-10.11.11/mysql-test/suite/innodb/t/update_time-master.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb/t/update_time-master.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ ---innodb-buffer-pool-size=5M diff -Nru mariadb-10.11.11/mysql-test/suite/innodb_fts/r/index_table.result mariadb-10.11.13/mysql-test/suite/innodb_fts/r/index_table.result --- mariadb-10.11.11/mysql-test/suite/innodb_fts/r/index_table.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb_fts/r/index_table.result 2025-05-19 16:14:24.000000000 +0000 @@ -5,6 +5,9 @@ title VARCHAR(200), content TEXT ) ENGINE= InnoDB; +SET STATEMENT debug_dbug='+d,innodb_report_deadlock' FOR +CREATE FULLTEXT INDEX idx ON articles (title, content); +ERROR HY000: Got error 11 "Resource temporarily unavailable" from storage engine InnoDB CREATE FULLTEXT INDEX idx ON articles (title, content); INSERT INTO articles (title, content) VALUES ('MySQL Tutorial','DBMS stands for MySQL DataBase ...'), diff -Nru mariadb-10.11.11/mysql-test/suite/innodb_fts/r/innodb_ft_aux_table.result mariadb-10.11.13/mysql-test/suite/innodb_fts/r/innodb_ft_aux_table.result --- mariadb-10.11.11/mysql-test/suite/innodb_fts/r/innodb_ft_aux_table.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb_fts/r/innodb_ft_aux_table.result 2025-05-19 16:14:24.000000000 +0000 @@ -118,4 +118,13 @@ SELECT @@GLOBAL.innodb_ft_aux_table; @@GLOBAL.innodb_ft_aux_table test/t1 +CREATE TABLE t(a INT) ENGINE=InnoDB; +SET GLOBAL innodb_ft_aux_table='test/t'; +ERROR 42000: Variable 'innodb_ft_aux_table' can't be set to the value of 'test/t' +DROP TABLE t; +SET GLOBAL innodb_ft_aux_table='test/t'; +ERROR 42000: Variable 'innodb_ft_aux_table' can't be set to the value of 'test/t' +SELECT @@GLOBAL.innodb_ft_aux_table; +@@GLOBAL.innodb_ft_aux_table +test/t1 SET GLOBAL innodb_ft_aux_table = @save_ft_aux_table; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb_fts/t/index_table.test mariadb-10.11.13/mysql-test/suite/innodb_fts/t/index_table.test --- mariadb-10.11.11/mysql-test/suite/innodb_fts/t/index_table.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb_fts/t/index_table.test 2025-05-19 16:14:24.000000000 +0000 @@ -3,6 +3,9 @@ -- source include/have_innodb.inc -- source include/have_debug.inc +--disable_query_log +call mtr.add_suppression("InnoDB: \\(Deadlock\\) writing `use_stopword'"); +--enable_query_log SET @optimize=@@GLOBAL.INNODB_OPTIMIZE_FULLTEXT_ONLY; SET GLOBAL INNODB_OPTIMIZE_FULLTEXT_ONLY=1; @@ -14,6 +17,9 @@ content TEXT ) ENGINE= InnoDB; +--error ER_GET_ERRNO +SET STATEMENT debug_dbug='+d,innodb_report_deadlock' FOR +CREATE FULLTEXT INDEX idx ON articles (title, content); CREATE FULLTEXT INDEX idx ON articles (title, content); INSERT INTO articles (title, content) VALUES diff -Nru mariadb-10.11.11/mysql-test/suite/innodb_fts/t/innodb_ft_aux_table.test mariadb-10.11.13/mysql-test/suite/innodb_fts/t/innodb_ft_aux_table.test --- mariadb-10.11.11/mysql-test/suite/innodb_fts/t/innodb_ft_aux_table.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb_fts/t/innodb_ft_aux_table.test 2025-05-19 16:14:24.000000000 +0000 @@ -41,4 +41,13 @@ SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE; SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_CONFIG; SELECT @@GLOBAL.innodb_ft_aux_table; + +CREATE TABLE t(a INT) ENGINE=InnoDB; +--error ER_WRONG_VALUE_FOR_VAR +SET GLOBAL innodb_ft_aux_table='test/t'; +DROP TABLE t; +--error ER_WRONG_VALUE_FOR_VAR +SET GLOBAL innodb_ft_aux_table='test/t'; +SELECT @@GLOBAL.innodb_ft_aux_table; + SET GLOBAL innodb_ft_aux_table = @save_ft_aux_table; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb_gis/r/rollback.result mariadb-10.11.13/mysql-test/suite/innodb_gis/r/rollback.result --- mariadb-10.11.11/mysql-test/suite/innodb_gis/r/rollback.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb_gis/r/rollback.result 2025-05-19 16:14:25.000000000 +0000 @@ -412,3 +412,16 @@ ERROR HY000: Lost connection to server during query insert into t1 values(5, point(5,5), point(5,5), 5); drop table t1; +# +# MDEV-35420 Server aborts while deleting the record +# in spatial index +# +CREATE TABLE t1 (c POINT NOT NULL, SPATIAL(c)) engine=InnoDB; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SET STATEMENT unique_checks=0,foreign_key_checks=0 FOR +START TRANSACTION; +INSERT INTO t1 SELECT ST_GeomFromText('POINT(114368751 656950466)') FROM seq_1_to_512; +ROLLBACK; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb_gis/t/rollback.test mariadb-10.11.13/mysql-test/suite/innodb_gis/t/rollback.test --- mariadb-10.11.11/mysql-test/suite/innodb_gis/t/rollback.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb_gis/t/rollback.test 2025-05-19 16:14:25.000000000 +0000 @@ -8,6 +8,7 @@ # Avoid CrashReporter popup on Mac --source include/not_crashrep.inc --source include/have_innodb_16k.inc +--source include/have_sequence.inc CREATE TABLE t4 (id bigint(12) unsigned NOT NULL auto_increment, c2 varchar(15) collate utf8_bin default NULL, @@ -475,3 +476,15 @@ insert into t1 values(5, point(5,5), point(5,5), 5); drop table t1; + +--echo # +--echo # MDEV-35420 Server aborts while deleting the record +--echo # in spatial index +--echo # +CREATE TABLE t1 (c POINT NOT NULL, SPATIAL(c)) engine=InnoDB; +CHECK TABLE t1; +SET STATEMENT unique_checks=0,foreign_key_checks=0 FOR +START TRANSACTION; +INSERT INTO t1 SELECT ST_GeomFromText('POINT(114368751 656950466)') FROM seq_1_to_512; +ROLLBACK; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/innodb_gis/t/rtree_purge.test mariadb-10.11.13/mysql-test/suite/innodb_gis/t/rtree_purge.test --- mariadb-10.11.11/mysql-test/suite/innodb_gis/t/rtree_purge.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/innodb_gis/t/rtree_purge.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,5 +1,6 @@ # This test case will test R-tree purge. +--source include/long_test.inc --source include/innodb_page_size.inc --source include/have_sequence.inc --source include/not_valgrind.inc diff -Nru mariadb-10.11.11/mysql-test/suite/json/r/json_no_table.result mariadb-10.11.13/mysql-test/suite/json/r/json_no_table.result --- mariadb-10.11.11/mysql-test/suite/json/r/json_no_table.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/json/r/json_no_table.result 2025-05-19 16:14:25.000000000 +0000 @@ -2886,7 +2886,7 @@ ["a", "b", "c"] select charset(json_unquote('"abc"')); charset(json_unquote('"abc"')) -utf8mb3 +utf8mb4 select json_quote(convert(X'e68891' using utf8)); json_quote(convert(X'e68891' using utf8)) "我" diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/full_backup.result mariadb-10.11.13/mysql-test/suite/mariabackup/full_backup.result --- mariadb-10.11.11/mysql-test/suite/mariabackup/full_backup.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/full_backup.result 2025-05-19 16:14:25.000000000 +0000 @@ -1,7 +1,12 @@ CREATE TABLE t(i INT) ENGINE INNODB; INSERT INTO t VALUES(1); +SET GLOBAL innodb_max_purge_lag_wait=0; # xtrabackup backup NOT FOUND /InnoDB: Allocated tablespace ID/ in backup.log +SELECT variable_value FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; +variable_value +0 INSERT INTO t VALUES(2); # xtrabackup prepare # shutdown server diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/full_backup.test mariadb-10.11.13/mysql-test/suite/mariabackup/full_backup.test --- mariadb-10.11.11/mysql-test/suite/mariabackup/full_backup.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/full_backup.test 2025-05-19 16:14:25.000000000 +0000 @@ -2,6 +2,7 @@ CREATE TABLE t(i INT) ENGINE INNODB; INSERT INTO t VALUES(1); +SET GLOBAL innodb_max_purge_lag_wait=0; echo # xtrabackup backup; let $targetdir=$MYSQLTEST_VARDIR/tmp/backup; --let $backup_log=$MYSQLTEST_VARDIR/tmp/backup.log @@ -18,6 +19,8 @@ --source include/search_pattern_in_file.inc --remove_file $backup_log +SELECT variable_value FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; INSERT INTO t VALUES(2); diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/incremental_compressed.result mariadb-10.11.13/mysql-test/suite/mariabackup/incremental_compressed.result --- mariadb-10.11.11/mysql-test/suite/mariabackup/incremental_compressed.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/incremental_compressed.result 2025-05-19 16:14:25.000000000 +0000 @@ -4,6 +4,9 @@ # CREATE TABLE t (pk INT PRIMARY KEY) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; ALTER TABLE t PARTITION BY KEY(pk); +# Incremental backup +# Prepare fullbackup +# Prepare incremental backup # shutdown server # remove datadir # xtrabackup move back diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/incremental_compressed.test mariadb-10.11.13/mysql-test/suite/mariabackup/incremental_compressed.test --- mariadb-10.11.11/mysql-test/suite/mariabackup/incremental_compressed.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/incremental_compressed.test 2025-05-19 16:14:25.000000000 +0000 @@ -16,12 +16,18 @@ ALTER TABLE t PARTITION BY KEY(pk); +--echo # Incremental backup --exec $XTRABACKUP --backup --target-dir=$incremental_dir --incremental-basedir=$basedir --protocol=tcp --port=$MASTER_MYPORT --user=root > $incremental_dir.log 2>&1 +--echo # Prepare fullbackup --exec $XTRABACKUP --prepare --target-dir=$basedir --user=root > $MYSQL_TMP_DIR/backup_prepare_0.log 2>&1 ---exec $XTRABACKUP --prepare --target-dir=$basedir --incremental-dir=$incremental_dir --user=root > $MYSQL_TMP_DIR/backup_prepare_1.log ---cat_file $MYSQL_TMP_DIR/backup_prepare_1.log +--echo # Prepare incremental backup +--exec $XTRABACKUP --prepare --target-dir=$basedir --incremental-dir=$incremental_dir --user=root > $MYSQL_TMP_DIR/backup_prepare_1.log 2>&1 let $targetdir=$basedir; -- source include/restart_and_restore.inc - SHOW CREATE TABLE t; DROP TABLE t; +remove_file $incremental_dir.log; +remove_file $MYSQL_TMP_DIR/backup_prepare_0.log; +remove_file $MYSQL_TMP_DIR/backup_prepare_1.log; +rmdir $basedir; +rmdir $incremental_dir; diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/log_page_corruption.test mariadb-10.11.13/mysql-test/suite/mariabackup/log_page_corruption.test --- mariadb-10.11.11/mysql-test/suite/mariabackup/log_page_corruption.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/log_page_corruption.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,5 +1,5 @@ +--source include/long_test.inc --source include/have_debug.inc ---source include/no_valgrind_without_big.inc --source include/innodb_undo_tablespaces.inc --echo ######## diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/partial.result mariadb-10.11.13/mysql-test/suite/mariabackup/partial.result --- mariadb-10.11.11/mysql-test/suite/mariabackup/partial.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/partial.result 2025-05-19 16:14:25.000000000 +0000 @@ -4,8 +4,8 @@ INSERT INTO t21 VALUES(1); CREATE TABLE t2(i int) ENGINE INNODB; # xtrabackup backup -t1.new -t21.new +t1.ibd +t21.ibd # xtrabackup prepare t1.cfg t21.cfg diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/partial_exclude.result mariadb-10.11.13/mysql-test/suite/mariabackup/partial_exclude.result --- mariadb-10.11.11/mysql-test/suite/mariabackup/partial_exclude.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/partial_exclude.result 2025-05-19 16:14:25.000000000 +0000 @@ -14,7 +14,7 @@ INSERT INTO test.t2 VALUES(20); # xtrabackup backup COMMIT; -t1.new +t1.ibd DROP TABLE t1; DROP TABLE t2; DROP DATABASE db2; diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/partition_notwin.result mariadb-10.11.13/mysql-test/suite/mariabackup/partition_notwin.result --- mariadb-10.11.11/mysql-test/suite/mariabackup/partition_notwin.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/partition_notwin.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,11 @@ +# +# MDEV-36437 mariabackup - confusing error message when running out of file handles with partitioned MyISAM +# +create table t1 ( +id bigint(20) not null auto_increment, +primary key (id) +) engine=myisam +partition by hash (id) +partitions 600; +FOUND 1 /Error 24 on file ./test/t1#P#p\d+\.MY[DI] open during `test`.`t1` table copy: Too many open files/ in backup.log +drop table t1; diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/partition_notwin.test mariadb-10.11.13/mysql-test/suite/mariabackup/partition_notwin.test --- mariadb-10.11.11/mysql-test/suite/mariabackup/partition_notwin.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/partition_notwin.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,25 @@ +source include/not_windows.inc; +source include/have_partition.inc; +let $targetdir=$MYSQLTEST_VARDIR/tmp/backup; +let $log=$MYSQL_TMP_DIR/backup.log; + +--echo # +--echo # MDEV-36437 mariabackup - confusing error message when running out of file handles with partitioned MyISAM +--echo # + +create table t1 ( + id bigint(20) not null auto_increment, + primary key (id) +) engine=myisam + partition by hash (id) + partitions 600; + +error 1; +exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir > $log 2>&1; +let SEARCH_FILE=$log; +let SEARCH_PATTERN=Error 24 on file ./test/t1#P#p\d+\.MY[DI] open during `test`.`t1` table copy: Too many open files; +source include/search_pattern_in_file.inc; + +rmdir $targetdir; +#remove_file $log; +drop table t1; diff -Nru mariadb-10.11.11/mysql-test/suite/mariabackup/unsupported_redo.result mariadb-10.11.13/mysql-test/suite/mariabackup/unsupported_redo.result --- mariadb-10.11.11/mysql-test/suite/mariabackup/unsupported_redo.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/mariabackup/unsupported_redo.result 2025-05-19 16:14:25.000000000 +0000 @@ -22,8 +22,8 @@ ALTER TABLE t21 FORCE, ALGORITHM=INPLACE; # Create partial backup (excluding table t21), Ignore the # unsupported redo log for the table t21. -t1.new -t2.new +t1.ibd +t2.ibd # Prepare the full backup t1.ibd t2.ibd diff -Nru mariadb-10.11.11/mysql-test/suite/multi_source/master_info_file.opt mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.opt --- mariadb-10.11.11/mysql-test/suite/multi_source/master_info_file.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.opt 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1 @@ +--master-info-file=$MYSQL_TMP_DIR/master_info_file.txt diff -Nru mariadb-10.11.11/mysql-test/suite/multi_source/master_info_file.result mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.result --- mariadb-10.11.11/mysql-test/suite/multi_source/master_info_file.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,18 @@ +CHANGE MASTER TO master_host='127.0.0.1', master_user='root', master_port=SERVER_MYPORT_1; +CHANGE MASTER 'named' TO master_host='localhost', master_user='test', master_port=SERVER_MYPORT_2; +--list_files @@datadir *.info +relay-log-named.info +relay-log.info +--list_files MYSQL_TMP_DIR *.txt +master_info_file-named.txt +master_info_file.txt +multi-master_info_file.txt +--cat_file MYSQL_TMP_DIR/multi-master_info_file.txt +named +FOUND 1 matches in master_info_file.txt +FOUND 1 matches in master_info_file.txt +FOUND 1 matches in master_info_file.txt +FOUND 1 matches in master_info_file-named.txt +FOUND 1 matches in master_info_file-named.txt +FOUND 1 matches in master_info_file-named.txt +RESET REPLICA 'named' ALL; diff -Nru mariadb-10.11.11/mysql-test/suite/multi_source/master_info_file.test mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.test --- mariadb-10.11.11/mysql-test/suite/multi_source/master_info_file.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/multi_source/master_info_file.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,38 @@ +# MDEV-36238: Test `--master-info-file` +# +# Other tests (such as `info_logs`) work explicitly with `(multi-)master.info`. +# This test sees that `--master-info-file` moves/renames this file. + +--source include/not_embedded.inc +--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1 +--eval CHANGE MASTER TO master_host='127.0.0.1', master_user='root', master_port=$SERVER_MYPORT_1 +--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2 +--eval CHANGE MASTER 'named' TO master_host='localhost', master_user='test', master_port=$SERVER_MYPORT_2 + +--let $datadir = `SELECT @@datadir` +--echo --list_files @@datadir *.info +--list_files $datadir *.info +--echo --list_files MYSQL_TMP_DIR *.txt +--list_files $MYSQL_TMP_DIR *.txt + +--echo --cat_file MYSQL_TMP_DIR/multi-master_info_file.txt +--cat_file $MYSQL_TMP_DIR/multi-master_info_file.txt +--let SEARCH_OUTPUT= count + +--let SEARCH_FILE= $MYSQL_TMP_DIR/master_info_file.txt +--let SEARCH_PATTERN= \\n127.0.0.1\\n +--source include/search_pattern_in_file.inc +--let SEARCH_PATTERN= \\nroot\\n +--source include/search_pattern_in_file.inc +--let SEARCH_PATTERN= \\n$SERVER_MYPORT_1\\n +--source include/search_pattern_in_file.inc + +--let SEARCH_FILE= $MYSQL_TMP_DIR/master_info_file-named.txt +--let SEARCH_PATTERN= \\nlocalhost\\n +--source include/search_pattern_in_file.inc +--let SEARCH_PATTERN= \\ntest\\n +--source include/search_pattern_in_file.inc +--let SEARCH_PATTERN= \\n$SERVER_MYPORT_2\\n +--source include/search_pattern_in_file.inc + +RESET REPLICA 'named' ALL; diff -Nru mariadb-10.11.11/mysql-test/suite/multi_source/show_slave_auth_info.cnf mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.cnf --- mariadb-10.11.11/mysql-test/suite/multi_source/show_slave_auth_info.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,13 @@ +!include ./my.cnf + +[mysqld.1] +show-slave-auth-info + +[mysqld.4] +server-id=4 +log-warnings=2 +report-user=my_user +report-password=my_password + +[ENV] +SERVER_MYPORT_4= @mysqld.4.port diff -Nru mariadb-10.11.11/mysql-test/suite/multi_source/show_slave_auth_info.result mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.result --- mariadb-10.11.11/mysql-test/suite/multi_source/show_slave_auth_info.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,45 @@ +# Setup +connect master1,127.0.0.1,root,,,$SERVER_MYPORT_1; +connect master2,127.0.0.1,root,,,$SERVER_MYPORT_2; +connect slave1,127.0.0.1,root,,,$SERVER_MYPORT_3; +connect slave2,127.0.0.1,root,,,$SERVER_MYPORT_4; +connection slave2; +CHANGE MASTER TO master_host='127.0.0.1', master_user='root', master_port=SERVER_MYPORT_1; +CHANGE MASTER 'control sample' TO master_host='127.0.0.1', master_user='root', master_port=SERVER_MYPORT_2; +START ALL SLAVES; +connection slave1; +CHANGE MASTER TO master_host='127.0.0.1', master_user='root', master_port=SERVER_MYPORT_1; +CHANGE MASTER 'control sample' TO master_host='127.0.0.1', master_user='root', master_port=SERVER_MYPORT_2; +START ALL SLAVES; +# Test +connection master2; +SHOW SLAVE HOSTS; +Server_id Host Port Master_id +3 localhost SERVER_MYPORT_3 2 +4 localhost SERVER_MYPORT_4 2 +connection master1; +SHOW SLAVE HOSTS; +Server_id Host User Password Port Master_id +3 localhost SERVER_MYPORT_3 1 +4 localhost my_user my_password SERVER_MYPORT_4 1 +SHOW REPLICA HOSTS; +Server_id Host User Password Port Master_id +3 localhost SERVER_MYPORT_3 1 +4 localhost my_user my_password SERVER_MYPORT_4 1 +# Cleanup +connection slave2; +STOP ALL SLAVES; +include/wait_for_slave_to_stop.inc +SET @@SESSION.default_master_connection= 'control sample'; +include/wait_for_slave_to_stop.inc +RESET SLAVE ALL; +connection slave1; +STOP ALL SLAVES; +include/wait_for_slave_to_stop.inc +SET @@SESSION.default_master_connection= 'control sample'; +include/wait_for_slave_to_stop.inc +RESET SLAVE ALL; +disconnect master1; +disconnect master2; +disconnect slave1; +disconnect slave2; diff -Nru mariadb-10.11.11/mysql-test/suite/multi_source/show_slave_auth_info.test mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.test --- mariadb-10.11.11/mysql-test/suite/multi_source/show_slave_auth_info.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/multi_source/show_slave_auth_info.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,83 @@ +# MDEV-36238: Test `--show-slave-auth-info` (and `--report-user`/`password`) +# +# `rpl.rpl_show_slave_hosts` and `rpl.rpl_slave_alias_replica` +# (and several others) test SHOW SLAVE HOSTS without `--show-slave-auth-info`. +# This test supplements them with a comparison between with and without. + +# SHOW SLAVE HOSTS is agnostic of binlog formats +--source include/have_binlog_format_mixed.inc + +--echo # Setup + +# This server has `--show-slave-auth-info`. +--connect (master1,127.0.0.1,root,,,$SERVER_MYPORT_1) +# This `--show-slave-auth-info`-less server asserts that it is per-master. +--connect (master2,127.0.0.1,root,,,$SERVER_MYPORT_2) +# This is a non-reporting slave. +--connect (slave1,127.0.0.1,root,,,$SERVER_MYPORT_3) +# This is a self-reporting slave. +--connect (slave2,127.0.0.1,root,,,$SERVER_MYPORT_4) + +--let $rpl_server_number= 2 +while ($rpl_server_number) +{ + --connection slave$rpl_server_number + + --replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1 + --eval CHANGE MASTER TO master_host='127.0.0.1', master_user='root', master_port=$SERVER_MYPORT_1 + --replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2 + --eval CHANGE MASTER 'control sample' TO master_host='127.0.0.1', master_user='root', master_port=$SERVER_MYPORT_2 + --disable_warnings + START ALL SLAVES; + --enable_warnings + + --dec $rpl_server_number +} + +--echo # Test + +--let $rpl_server_number= 2 +while ($rpl_server_number) +{ + --connection master$rpl_server_number + + # Make sure the master's synced up + --let $show_statement= SHOW SLAVE HOSTS + --let $field= Server_id + --let $condition= =3 + --source include/wait_show_condition.inc + --let $condition= =4 + --source include/wait_show_condition.inc + + --replace_result $SERVER_MYPORT_3 SERVER_MYPORT_3 $SERVER_MYPORT_4 SERVER_MYPORT_4 + SHOW SLAVE HOSTS; + + --dec $rpl_server_number +} + +# MDEV-20601 Make REPLICA a synonym for SLAVE in SQL statements +--replace_result $SERVER_MYPORT_3 SERVER_MYPORT_3 $SERVER_MYPORT_4 SERVER_MYPORT_4 +SHOW REPLICA HOSTS; + +--echo # Cleanup + +--let $rpl_server_number= 2 +while ($rpl_server_number) +{ + --connection slave$rpl_server_number + + --disable_warnings + STOP ALL SLAVES; + --enable_warnings + --source include/wait_for_slave_to_stop.inc + SET @@SESSION.default_master_connection= 'control sample'; + --source include/wait_for_slave_to_stop.inc + RESET SLAVE ALL; + + --dec $rpl_server_number +} + +--disconnect master1 +--disconnect master2 +--disconnect slave1 +--disconnect slave2 diff -Nru mariadb-10.11.11/mysql-test/suite/parts/t/partition_exchange_innodb.test mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_innodb.test --- mariadb-10.11.11/mysql-test/suite/parts/t/partition_exchange_innodb.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_innodb.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/long_test.inc --source include/have_innodb.inc --source include/have_partition.inc --source include/have_debug_sync.inc diff -Nru mariadb-10.11.11/mysql-test/suite/parts/t/partition_exchange_memory.test mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_memory.test --- mariadb-10.11.11/mysql-test/suite/parts/t/partition_exchange_memory.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_memory.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/long_test.inc --source include/have_partition.inc --source include/have_debug_sync.inc diff -Nru mariadb-10.11.11/mysql-test/suite/parts/t/partition_exchange_myisam.test mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_myisam.test --- mariadb-10.11.11/mysql-test/suite/parts/t/partition_exchange_myisam.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/parts/t/partition_exchange_myisam.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/long_test.inc --source include/have_partition.inc --source include/have_debug_sync.inc diff -Nru mariadb-10.11.11/mysql-test/suite/perfschema/r/threads_innodb.result mariadb-10.11.13/mysql-test/suite/perfschema/r/threads_innodb.result --- mariadb-10.11.11/mysql-test/suite/perfschema/r/threads_innodb.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/perfschema/r/threads_innodb.result 2025-05-19 16:14:25.000000000 +0000 @@ -1,10 +1,10 @@ SELECT name, type, processlist_user, processlist_host, processlist_db, -processlist_command, processlist_time, processlist_state, processlist_info, +processlist_command, processlist_time, processlist_info, parent_thread_id, role, instrumented FROM performance_schema.threads WHERE name LIKE 'thread/innodb/%' GROUP BY name; -name type processlist_user processlist_host processlist_db processlist_command processlist_time processlist_state processlist_info parent_thread_id role instrumented -thread/innodb/page_cleaner_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL NULL YES -thread/innodb/page_encrypt_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL NULL YES -thread/innodb/thread_pool_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL NULL YES +name type processlist_user processlist_host processlist_db processlist_command processlist_time processlist_info parent_thread_id role instrumented +thread/innodb/page_cleaner_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL YES +thread/innodb/page_encrypt_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL YES +thread/innodb/thread_pool_thread BACKGROUND NULL NULL NULL NULL NULL NULL NULL NULL YES diff -Nru mariadb-10.11.11/mysql-test/suite/perfschema/t/threads_innodb.test mariadb-10.11.13/mysql-test/suite/perfschema/t/threads_innodb.test --- mariadb-10.11.11/mysql-test/suite/perfschema/t/threads_innodb.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/perfschema/t/threads_innodb.test 2025-05-19 16:14:25.000000000 +0000 @@ -14,7 +14,7 @@ # We suppress here duplicates rows with the goal to avoid that the test fails # in case some defaults are changed. SELECT name, type, processlist_user, processlist_host, processlist_db, - processlist_command, processlist_time, processlist_state, processlist_info, + processlist_command, processlist_time, processlist_info, parent_thread_id, role, instrumented FROM performance_schema.threads WHERE name LIKE 'thread/innodb/%' diff -Nru mariadb-10.11.11/mysql-test/suite/plugins/r/server_audit.result mariadb-10.11.13/mysql-test/suite/plugins/r/server_audit.result --- mariadb-10.11.11/mysql-test/suite/plugins/r/server_audit.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/plugins/r/server_audit.result 2025-05-19 16:14:25.000000000 +0000 @@ -20,6 +20,9 @@ set global server_audit_incl_users=null; set global server_audit_file_path='server_audit.log'; set global server_audit_output_type=file; +set global server_audit_file_path=REPEAT(REPEAT('new_file_name', 50), 50); +Warnings: +Warning 1 server_audit_file_path can't exceed FN_LEN characters. set global server_audit_logging=on; set global server_audit_incl_users= repeat("'root',", 10000); ERROR 42000: Variable 'server_audit_incl_users' can't be set to the value of ''root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','root','...' diff -Nru mariadb-10.11.11/mysql-test/suite/plugins/t/server_audit.test mariadb-10.11.13/mysql-test/suite/plugins/t/server_audit.test --- mariadb-10.11.11/mysql-test/suite/plugins/t/server_audit.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/plugins/t/server_audit.test 2025-05-19 16:14:25.000000000 +0000 @@ -20,6 +20,10 @@ set global server_audit_incl_users=null; set global server_audit_file_path='server_audit.log'; set global server_audit_output_type=file; + +--replace_regex /[1-9][0-9][0-9]+/FN_LEN/ +set global server_audit_file_path=REPEAT(REPEAT('new_file_name', 50), 50); + set global server_audit_logging=on; --error ER_WRONG_VALUE_FOR_VAR diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/r/parallel_backup_xa_debug.result mariadb-10.11.13/mysql-test/suite/rpl/r/parallel_backup_xa_debug.result --- mariadb-10.11.11/mysql-test/suite/rpl/r/parallel_backup_xa_debug.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/r/parallel_backup_xa_debug.result 2025-05-19 16:14:25.000000000 +0000 @@ -4,6 +4,7 @@ CREATE TABLE t (a INT) ENGINE = innodb; connection slave; include/stop_slave.inc +SET STATEMENT sql_log_bin= 0 FOR ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads; SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode; SET @@global.slave_parallel_threads= 2; @@ -19,6 +20,7 @@ connection slave; SET @@global.debug_dbug="+d,hold_worker_on_schedule"; start slave; +SET debug_sync = 'now WAIT_FOR reached_pause'; connection slave1; backup stage start; backup stage block_commit; diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_create_select_row.result mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_create_select_row.result --- mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_create_select_row.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_create_select_row.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,158 @@ +include/master-slave.inc +[connection master] +connection master; +set @max_binlog_cache_size = @@global.max_binlog_cache_size; +set @binlog_cache_size = @@global.binlog_cache_size; +set @@global.max_binlog_cache_size = 4096; +set @@global. binlog_cache_size = 4096; +# +# MDEV-35207 ignored error at binlogging by CREATE-TABLE-SELECT leads to assert +# +connect conn_err,localhost,root,,; +call mtr.add_suppression("Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage"); +create table t engine=myisam select repeat ('a',4096*3) AS a; +ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mariadbd variable and try again +create table t engine=innodb select repeat ('a',4096*3) AS a; +ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mariadbd variable and try again +create table t (a int unique, b char) select 1 AS a, 'b' as b union select 1 as a, 'c' as b; +ERROR 23000: Duplicate entry '1' for key 'a' +select * from t; +ERROR 42S02: Table 'test.t' doesn't exist +disconnect conn_err; +connection master; + +# +# MDEV-35499 errored CREATE-OR-REPLACE-SELECT does not DROP table in binlog +# +# +# Engine = innodb +# +set statement binlog_format=statement for create table t (a int) select 1 as a; +set statement binlog_format=row for create or replace table t (a int primary key, b char) engine=innodb select 1 AS a, 'b' as b union select 1 as a, 'c' as b; +ERROR 23000: Duplicate entry '1' for key 'PRIMARY' +select * from t; +ERROR 42S02: Table 'test.t' doesn't exist +# +# Prove an expected lonely `DROP table t' +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; DROP TABLE IF EXISTS `test`.`t`/* Generated to handle failed CREATE OR REPLACE */ +master-bin.000001 # Query # # ROLLBACK +set statement binlog_format=statement for create table t (a int) select 1 as a; +set statement binlog_format=row for create or replace table t (a text) engine=innodb select repeat ('a',1024) AS a union select repeat ('a',3*4096) AS a union select repeat ('a',3*4096) AS a; +ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mariadbd variable and try again +select * from t; +ERROR 42S02: Table 'test.t' doesn't exist +# +# Prove an expected lonely `DROP table t' +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; DROP TABLE IF EXISTS `test`.`t`/* Generated to handle failed CREATE OR REPLACE */ +master-bin.000001 # Query # # ROLLBACK +set statement binlog_format=statement for create table t (a int) select 1 as a; +set statement binlog_format=row for create or replace table t (a text) engine=innodb select repeat ('a',4096*3) AS a;; +ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mariadbd variable and try again +select * from t; +ERROR 42S02: Table 'test.t' doesn't exist +# +# Prove an expected lonely `DROP table t' +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; DROP TABLE IF EXISTS `test`.`t`/* Generated to handle failed CREATE OR REPLACE */ +master-bin.000001 # Query # # ROLLBACK +# +# Engine = myisam +# +set statement binlog_format=statement for create table t (a int) select 1 as a; +set statement binlog_format=row for create or replace table t (a int primary key, b char) engine=myisam select 1 AS a, 'b' as b union select 1 as a, 'c' as b; +ERROR 23000: Duplicate entry '1' for key 'PRIMARY' +select * from t; +ERROR 42S02: Table 'test.t' doesn't exist +# +# Prove an expected lonely `DROP table t' +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; DROP TABLE IF EXISTS `test`.`t`/* Generated to handle failed CREATE OR REPLACE */ +master-bin.000001 # Query # # ROLLBACK +set statement binlog_format=statement for create table t (a int) select 1 as a; +set statement binlog_format=row for create or replace table t (a text) engine=myisam select repeat ('a',1024) AS a union select repeat ('a',3*4096) AS a union select repeat ('a',3*4096) AS a; +ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mariadbd variable and try again +select * from t; +ERROR 42S02: Table 'test.t' doesn't exist +# +# Prove an expected lonely `DROP table t' +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; DROP TABLE IF EXISTS `test`.`t`/* Generated to handle failed CREATE OR REPLACE */ +master-bin.000001 # Query # # ROLLBACK +set statement binlog_format=statement for create table t (a int) select 1 as a; +set statement binlog_format=row for create or replace table t (a text) engine=myisam select repeat ('a',4096*3) AS a;; +ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mariadbd variable and try again +select * from t; +ERROR 42S02: Table 'test.t' doesn't exist +# +# Prove an expected lonely `DROP table t' +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; DROP TABLE IF EXISTS `test`.`t`/* Generated to handle failed CREATE OR REPLACE */ +master-bin.000001 # Query # # ROLLBACK +create table ti_pk (a int primary key) engine=innodb; +create table ta (a int) engine=aria; +create function f_ia(arg int) +returns integer +begin +insert into ti_pk set a=1; +insert into ta set a=1; +insert into ti_pk set a=arg; +return 1; +end | +set statement binlog_format = ROW for create table t_y (a int) engine=aria select f_ia(1 /* err */) as a; +ERROR 23000: Duplicate entry '1' for key 'PRIMARY' +select * from t_y; +ERROR 42S02: Table 'test.t_y' doesn't exist +# correct execution: `ta` is modified and its new record is binlogged +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Table_map # # table_id: # (test.ta) +master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000001 # Query # # COMMIT +select * from ta; +a +1 +select * from ti_pk; +a +connection slave; +include/diff_tables.inc [master:ta,slave:ta] +connection master; +delete from ta; +connection slave; +connection master; +set statement binlog_format = STATEMENT for create table t_y (a int) engine=aria select f_ia(1 /* err */) as a; +ERROR 23000: Duplicate entry '1' for key 'PRIMARY' +select * from t_y; +ERROR 42S02: Table 'test.t_y' doesn't exist +# ***TODO: fix MDEV-36027***. As of now `ta` is modified but that's not binlogged +include/show_binlog_events.inc +select *,'on_master' from ta; +a on_master +1 on_master +select * from ti_pk; +a +connection slave; +select *,'on_slave' from ta; +a on_slave +connection master; +drop function f_ia; +drop table ti_pk, ta; +SET @@global.max_binlog_cache_size = @max_binlog_cache_size; +SET @@global. binlog_cache_size = @binlog_cache_size; +connection slave; +End of the tests +include/rpl_end.inc diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_gtid_crash.result mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_gtid_crash.result --- mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_gtid_crash.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_gtid_crash.result 2025-05-19 16:14:25.000000000 +0000 @@ -12,6 +12,8 @@ connection server_2; SET sql_log_bin=0; call mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); +call mtr.add_suppression('Slave I/O: .*Lost connection to server during query'); +call mtr.add_suppression("Slave I/O thread couldn't register on master"); SET sql_log_bin=1; include/stop_slave.inc CHANGE MASTER TO master_host = '127.0.0.1', master_port = MASTER_PORT, diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_master_pos_wait.result mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_master_pos_wait.result --- mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_master_pos_wait.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_master_pos_wait.result 2025-05-19 16:14:25.000000000 +0000 @@ -43,6 +43,9 @@ select master_pos_wait('master-bin.000001',1000000,1,"my_slave"); master_pos_wait('master-bin.000001',1000000,1,"my_slave") -1 +select master_pos_wait('master-bin.000001',1000000,1,"MY_SLAVE"); +master_pos_wait('master-bin.000001',1000000,1,"MY_SLAVE") +-1 STOP SLAVE 'my_slave'; RESET SLAVE 'my_slave' ALL; change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root'; diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_parallel_innodb_lock_conflict.result mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_parallel_innodb_lock_conflict.result --- mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_parallel_innodb_lock_conflict.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_parallel_innodb_lock_conflict.result 2025-05-19 16:14:25.000000000 +0000 @@ -1,16 +1,15 @@ ***MDEV-5914: Parallel replication deadlock due to InnoDB lock conflicts *** include/master-slave.inc [connection master] -connection server_2; -SET sql_log_bin=0; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CALL mtr.add_suppression("InnoDB: Transaction was aborted due to "); CALL mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); -SET sql_log_bin=1; +connection server_2; SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; include/stop_slave.inc SET GLOBAL slave_parallel_threads=10; CHANGE MASTER TO master_use_gtid=slave_pos; connection server_1; -ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB; INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); connect con1,127.0.0.1,root,,test,$SERVER_MYPORT_1,; diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_semi_sync_master_disable_with_slave.result mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_semi_sync_master_disable_with_slave.result --- mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_semi_sync_master_disable_with_slave.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_semi_sync_master_disable_with_slave.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,41 @@ +# Set up Semi-Sync with rpl_semi_sync_master_wait_no_slave=0 +include/master-slave.inc +[connection master] +SET @@GLOBAL.rpl_semi_sync_master_enabled= 1; +SET @@GLOBAL.rpl_semi_sync_master_wait_no_slave= 0; +connection slave; +SET @@GLOBAL.rpl_semi_sync_slave_enabled= 1; +include/start_slave.inc +connection master; +connection slave; +connection master; +SELECT ID INTO @binlog_dump_tid +FROM information_schema.PROCESSLIST WHERE COMMAND = 'Binlog Dump'; +# Control State +SELECT STATE FROM information_schema.PROCESSLIST WHERE ID = @binlog_dump_tid; +STATE +Master has sent all binlog to slave; waiting for more updates +SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; +Variable_name Value +Rpl_semi_sync_master_clients 1 +# Disable Semi-Sync while the dump thread is still connected to its slave +SET @@GLOBAL.rpl_semi_sync_master_enabled = 0; +SELECT STATE FROM information_schema.PROCESSLIST WHERE ID = @binlog_dump_tid; +STATE +Master has sent all binlog to slave; waiting for more updates +SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; +Variable_name Value +Rpl_semi_sync_master_clients 1 +# Disconnect the slave and wait until the master's dump thread is gone +connection slave; +STOP SLAVE; +connection master; +SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; +Variable_name Value +Rpl_semi_sync_master_clients 0 +# Cleanup +SET @@GLOBAL.rpl_semi_sync_master_enabled= 0; +SET @@GLOBAL.rpl_semi_sync_master_wait_no_slave= 1; +connection slave; +SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0; +include/rpl_end.inc diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_semi_sync_ssl_stop.result mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_semi_sync_ssl_stop.result --- mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_semi_sync_ssl_stop.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_semi_sync_ssl_stop.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,53 @@ +# Skip starting the slave because we manually start with SSL later +include/master-slave.inc +[connection master] +# +# Setup +connection master; +CREATE USER replssl@localhost; +GRANT REPLICATION SLAVE on *.* to replssl@localhost REQUIRE SSL; +set @orig_master_enabled= @@GLOBAL.rpl_semi_sync_master_enabled; +SET @@GLOBAL.rpl_semi_sync_master_enabled= 1; +connection slave; +CHANGE MASTER TO +master_user='replssl', +master_password='', +master_ssl=1, +master_ssl_ca='MYSQL_TEST_DIR/std_data/cacert.pem', +master_ssl_cert='MYSQL_TEST_DIR/std_data/client-cert.pem', +master_ssl_key='MYSQL_TEST_DIR/std_data/client-key.pem'; +set @orig_slave_enabled= @@GLOBAL.rpl_semi_sync_slave_enabled; +SET @@GLOBAL.rpl_semi_sync_slave_enabled= 1; +include/start_slave.inc +connection master; +# Verify Semi-Sync is active +SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; +Variable_name Value +Rpl_semi_sync_master_clients 1 +# Create some table so slave can be seen as up-to-date and working +connection master; +CREATE TABLE t1 (a INT); +connection slave; +# Disconnect the slave and wait until the master's dump thread is gone +connection slave; +STOP SLAVE; +connection master; +# MDEV-36663: Verifying dump thread connection is killed.. +# ..done +# Cleanup +connection master; +SET @@GLOBAL.rpl_semi_sync_master_enabled= @orig_master_enabled; +DROP USER replssl@localhost; +DROP TABLE t1; +connection slave; +SET @@GLOBAL.rpl_semi_sync_slave_enabled= @orig_slave_enabled; +CHANGE MASTER TO +master_user='root', +master_ssl=0, +master_ssl_ca='', +master_ssl_cert='', +master_ssl_key=''; +connection slave; +include/start_slave.inc +include/rpl_end.inc +# End of rpl_semi_sync_ssl_stop.inc diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_xa_2pc_multi_engine.result mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_xa_2pc_multi_engine.result --- mariadb-10.11.11/mysql-test/suite/rpl/r/rpl_xa_2pc_multi_engine.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/r/rpl_xa_2pc_multi_engine.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,26 @@ +include/master-slave.inc +[connection master] +connection master; +create table t1 (a int primary key, b int) engine=innodb; +insert t1 values (1,1),(3,3),(5,5),(7,7); +create table t2 (m int) engine=aria; +# Create multi-engine, two-phase XA transaction (T1) +xa start '1'; +insert t2 values (1); +update t1 set b=50 where b=5; +xa end '1'; +xa prepare '1'; +# Create T2 +connection server_1; +update t1 set b=10 where a=5; +connection master; +xa commit '1'; +connection server_1; +include/save_master_gtid.inc +# This would hang prior to MDEV-21117 +connection slave; +include/sync_with_master_gtid.inc +connection master; +drop table t1, t2; +include/rpl_end.inc +# End of rpl_xa_2pc_multi_engine.test diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/parallel_backup_xa_debug.test mariadb-10.11.13/mysql-test/suite/rpl/t/parallel_backup_xa_debug.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/parallel_backup_xa_debug.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/parallel_backup_xa_debug.test 2025-05-19 16:14:25.000000000 +0000 @@ -11,6 +11,7 @@ --sync_slave_with_master --source include/stop_slave.inc +SET STATEMENT sql_log_bin= 0 FOR ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads; SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode; SET @@global.slave_parallel_threads= 2; @@ -28,20 +29,21 @@ --connection slave SET @@global.debug_dbug="+d,hold_worker_on_schedule"; start slave; +SET debug_sync = 'now WAIT_FOR reached_pause'; --let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit" --source include/wait_condition.inc --connection slave1 backup stage start; ---send backup stage block_commit +backup stage block_commit; --connection slave --let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for backup lock" SET debug_sync = 'now SIGNAL continue_worker'; +--source include/wait_condition.inc SET debug_sync = RESET; --connection slave1 -reap; backup stage end; --connection master diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_create_select_row.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_create_select_row.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_create_select_row.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_create_select_row.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,161 @@ +--source include/have_binlog_format_row.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--connection master +set @max_binlog_cache_size = @@global.max_binlog_cache_size; +set @binlog_cache_size = @@global.binlog_cache_size; +set @@global.max_binlog_cache_size = 4096; +set @@global. binlog_cache_size = 4096; + +--echo # +--echo # MDEV-35207 ignored error at binlogging by CREATE-TABLE-SELECT leads to assert +--echo # +# fix the current (write) binlog position +--let $binlog_file_0= query_get_value(SHOW MASTER STATUS, File, 1) +--let $binlog_start_0 = query_get_value(SHOW MASTER STATUS, Position, 1) + +# use a separate connection also to validate its close will be clean +connect (conn_err,localhost,root,,); + +call mtr.add_suppression("Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage"); +--error ER_TRANS_CACHE_FULL +create table t engine=myisam select repeat ('a',4096*3) AS a; + +--error ER_TRANS_CACHE_FULL +create table t engine=innodb select repeat ('a',4096*3) AS a; + +--error ER_DUP_ENTRY +create table t (a int unique, b char) select 1 AS a, 'b' as b union select 1 as a, 'c' as b; +--error ER_NO_SUCH_TABLE +select * from t; + +--disconnect conn_err + +--connection master +--let $binlog_file_1= query_get_value(SHOW MASTER STATUS, File, 1) +--let $binlog_start_1= query_get_value(SHOW MASTER STATUS, Position, 1) + +--let $cmp = `select strcmp('$binlog_file_1', '$binlog_file_0') <> 0 OR $binlog_start_1 <> $binlog_start_0` +if (!$cmp) +{ + --echo *** Error: unexpected advance of binlog position + --die +} + +--echo +--echo # +--echo # MDEV-35499 errored CREATE-OR-REPLACE-SELECT does not DROP table in binlog +--echo # +--let $i = 2 +while ($i) +{ + --let $engine=`select if($i % 2, "myisam", "innodb")` + --echo # + --echo # Engine = $engine + --echo # + set statement binlog_format=statement for create table t (a int) select 1 as a; + --let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + --let $binlog_start = query_get_value(SHOW MASTER STATUS, Position, 1) + --error ER_DUP_ENTRY + --eval set statement binlog_format=row for create or replace table t (a int primary key, b char) engine=$engine select 1 AS a, 'b' as b union select 1 as a, 'c' as b + --error ER_NO_SUCH_TABLE + select * from t; + --echo # + --echo # Prove an expected lonely `DROP table t' + --source include/show_binlog_events.inc + + # error before stmt commit + set statement binlog_format=statement for create table t (a int) select 1 as a; + --let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + --let $binlog_start = query_get_value(SHOW MASTER STATUS, Position, 1) + --error ER_TRANS_CACHE_FULL + --eval set statement binlog_format=row for create or replace table t (a text) engine=$engine select repeat ('a',1024) AS a union select repeat ('a',3*4096) AS a union select repeat ('a',3*4096) AS a + --error ER_NO_SUCH_TABLE + select * from t; + --echo # + --echo # Prove an expected lonely `DROP table t' + --source include/show_binlog_events.inc + + # error at stmt commit + set statement binlog_format=statement for create table t (a int) select 1 as a; + --let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) + --let $binlog_start = query_get_value(SHOW MASTER STATUS, Position, 1) + --error ER_TRANS_CACHE_FULL + --eval set statement binlog_format=row for create or replace table t (a text) engine=$engine select repeat ('a',4096*3) AS a; + --error ER_NO_SUCH_TABLE + select * from t; + --echo # + --echo # Prove an expected lonely `DROP table t' + --source include/show_binlog_events.inc + +--dec $i +} + +# Tests of mixed engines to demonstrate non-transaction table updates +# are binlogged or otherwise MDEV-36027. +create table ti_pk (a int primary key) engine=innodb; +create table ta (a int) engine=aria; +delimiter |; +create function f_ia(arg int) +returns integer +begin + insert into ti_pk set a=1; + insert into ta set a=1; + insert into ti_pk set a=arg; + return 1; +end | +delimiter ;| + +--let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) +--let $binlog_start = query_get_value(SHOW MASTER STATUS, Position, 1) + +--error ER_DUP_ENTRY +set statement binlog_format = ROW for create table t_y (a int) engine=aria select f_ia(1 /* err */) as a; +--error ER_NO_SUCH_TABLE +select * from t_y; + +--echo # correct execution: `ta` is modified and its new record is binlogged +--source include/show_binlog_events.inc +select * from ta; +select * from ti_pk; + +--sync_slave_with_master +--let $diff_tables=master:ta,slave:ta +--source include/diff_tables.inc + +--connection master +delete from ta; +--sync_slave_with_master + +--connection master +# MDEV-36027 Errored-out CREATE-SELECT does not binlog results of any function modifying non-transactional table +--let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1) +--let $binlog_start = query_get_value(SHOW MASTER STATUS, Position, 1) +--error ER_DUP_ENTRY +set statement binlog_format = STATEMENT for create table t_y (a int) engine=aria select f_ia(1 /* err */) as a; +--error ER_NO_SUCH_TABLE +select * from t_y; + +--echo # ***TODO: fix MDEV-36027***. As of now `ta` is modified but that's not binlogged +--source include/show_binlog_events.inc +select *,'on_master' from ta; +select * from ti_pk; + +--sync_slave_with_master +select *,'on_slave' from ta; + +# Cleanup +--connection master +drop function f_ia; +drop table ti_pk, ta; + +SET @@global.max_binlog_cache_size = @max_binlog_cache_size; +SET @@global. binlog_cache_size = @binlog_cache_size; + +# test that binlog replicates correctly to slave +# --connection slave +--sync_slave_with_master + +--echo End of the tests +--source include/rpl_end.inc diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_gtid_crash-slave.opt 2025-05-19 16:14:25.000000000 +0000 @@ -1 +1 @@ ---master-retry-count=100 --slave-net-timeout=10 +--master-retry-count=500 --slave-net-timeout=10 diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_gtid_crash.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_gtid_crash.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_gtid_crash.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_gtid_crash.test 2025-05-19 16:14:25.000000000 +0000 @@ -24,6 +24,8 @@ --sync_with_master SET sql_log_bin=0; call mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); +call mtr.add_suppression('Slave I/O: .*Lost connection to server during query'); +call mtr.add_suppression("Slave I/O thread couldn't register on master"); SET sql_log_bin=1; --source include/stop_slave.inc --replace_result $MASTER_MYPORT MASTER_PORT diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_heartbeat_basic.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_heartbeat_basic.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_heartbeat_basic.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_heartbeat_basic.test 2025-05-19 16:14:25.000000000 +0000 @@ -9,6 +9,7 @@ # * Various states of master and heartbeat # * Circular replication ############################################################# +--source include/long_test.inc --source include/master-slave.inc # # The test runs long and does not have any specifics to diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_master_pos_wait.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_master_pos_wait.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_master_pos_wait.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_master_pos_wait.test 2025-05-19 16:14:25.000000000 +0000 @@ -48,6 +48,7 @@ --echo # Call with a valid connection name -- hangs before MDEV-7130 fix (expected -1) select master_pos_wait('master-bin.000001',1000000,1,"my_slave"); +select master_pos_wait('master-bin.000001',1000000,1,"MY_SLAVE"); STOP SLAVE 'my_slave'; RESET SLAVE 'my_slave' ALL; diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_parallel_innodb_lock_conflict.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_parallel_innodb_lock_conflict.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_parallel_innodb_lock_conflict.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_parallel_innodb_lock_conflict.test 2025-05-19 16:14:25.000000000 +0000 @@ -5,21 +5,19 @@ --source include/have_debug_sync.inc --source include/master-slave.inc ---disable_query_log -call mtr.add_suppression("InnoDB: Transaction was aborted due to "); ---enable_query_log +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +CALL mtr.add_suppression("InnoDB: Transaction was aborted due to "); +CALL mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); +--save_master_pos --connection server_2 -SET sql_log_bin=0; -CALL mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends"); -SET sql_log_bin=1; +--sync_with_master SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; --source include/stop_slave.inc SET GLOBAL slave_parallel_threads=10; CHANGE MASTER TO master_use_gtid=slave_pos; --connection server_1 -ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; CREATE TABLE t4 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB; INSERT INTO t4 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6); --connect (con1,127.0.0.1,root,,test,$SERVER_MYPORT_1,) diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_row_drop_create_temp_table.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_row_drop_create_temp_table.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_row_drop_create_temp_table.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_row_drop_create_temp_table.test 2025-05-19 16:14:25.000000000 +0000 @@ -3,6 +3,7 @@ # tables. Specifically when drop temporary tables and create temporary tables # are used. ################################################################################### +--source include/long_test.inc --source include/have_binlog_format_row.inc --source include/have_innodb.inc --source include/master-slave.inc diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync.test 2025-05-19 16:14:25.000000000 +0000 @@ -4,6 +4,7 @@ # Please check all dependent tests after modifying it # +source include/long_test.inc; source include/not_embedded.inc; source include/have_innodb.inc; source include/master-slave.inc; diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/long_test.inc --source include/have_binlog_format_statement.inc set global rpl_semi_sync_master_wait_point=AFTER_SYNC; source rpl_semi_sync.test; diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync_row.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync_row.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync_row.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_after_sync_row.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/long_test.inc --source include/have_binlog_format_row.inc set global rpl_semi_sync_master_wait_point=AFTER_SYNC; source rpl_semi_sync.test; diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync_master_disable_with_slave.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_master_disable_with_slave.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync_master_disable_with_slave.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_master_disable_with_slave.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,68 @@ +# MDEV-36359: Master crashes when reverting to async after Semi-Sync disabled. +# +# Assert behavior of turning Semi-Sync off on +# the master when still connected to a slave + +--source include/have_binlog_format_mixed.inc # format-agnostic + +--echo # Set up Semi-Sync with rpl_semi_sync_master_wait_no_slave=0 +--let $rpl_skip_start_slave= 1 +--source include/master-slave.inc + +--let $orig_master_enabled=`SELECT @@GLOBAL.rpl_semi_sync_master_enabled` +SET @@GLOBAL.rpl_semi_sync_master_enabled= 1; +--let $orig_wait_no_slave=`SELECT @@GLOBAL.rpl_semi_sync_master_wait_no_slave` +SET @@GLOBAL.rpl_semi_sync_master_wait_no_slave= 0; + +--connection slave +--let $orig_slave_enabled=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled` +SET @@GLOBAL.rpl_semi_sync_slave_enabled= 1; +--source include/start_slave.inc + +--connection master +# Make sure Semi-Sync is active +--let $status_var= Rpl_semi_sync_master_status +--let $status_var_value= ON +--source include/wait_for_status_var.inc + +--sync_slave_with_master +--connection master + +--disable_cursor_protocol +SELECT ID INTO @binlog_dump_tid + FROM information_schema.PROCESSLIST WHERE COMMAND = 'Binlog Dump'; +--enable_cursor_protocol + +--echo # Control State +SELECT STATE FROM information_schema.PROCESSLIST WHERE ID = @binlog_dump_tid; +SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; + +--echo # Disable Semi-Sync while the dump thread is still connected to its slave +SET @@GLOBAL.rpl_semi_sync_master_enabled = 0; +--let $status_var_value= OFF +--source include/wait_for_status_var.inc + +SELECT STATE FROM information_schema.PROCESSLIST WHERE ID = @binlog_dump_tid; +SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; + +--echo # Disconnect the slave and wait until the master's dump thread is gone +--connection slave +STOP SLAVE; +# Starting with MDEV-13073, +# Semi-Sync STOP SLAVE also terminates its dump thread on the master. +--connection master + +# MDEV-36359: The disconnection would crash the master and leave the wait with +# error 2013 'Lost connection to server during query' +--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.PROCESSLIST WHERE ID = @binlog_dump_tid +--source include/wait_condition.inc +SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; + +--echo # Cleanup +--eval SET @@GLOBAL.rpl_semi_sync_master_enabled= $orig_master_enabled +--eval SET @@GLOBAL.rpl_semi_sync_master_wait_no_slave= $orig_wait_no_slave +--connection slave +--eval SET @@GLOBAL.rpl_semi_sync_slave_enabled= $orig_slave_enabled + +--let $rpl_only_running_threads= 1 +--source include/rpl_end.inc diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_semi_sync_ssl_stop.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,100 @@ +# +# This test verifies that semi-sync setups configured to use SSL can kill +# the replication connection when the IO thread is stopped (e.g. from +# STOP SLAVE). The way it should happen, is that the IO thread creates a new +# connection to the primary which issues KILL on the connection id of the +# replication connection. MDEV-36663 reported an issue where this new +# kill-oriented connection could not connect to a primary when it requires +# connections to use SSL. +# +# This test sets up a semi-sync SSL master-slave topology, and stops the +# slave IO thread. It then validates that the connection was killed by using +# the wait_condition.inc utility to wait for the binlog dump thread to die, +# and also validates that the status variable Rpl_semi_sync_master_clients +# reports as 0. +# +# References: +# MDEV-36663: Semi-sync Replica Can't Kill Dump Thread When Using SSL +# +--source include/have_binlog_format_mixed.inc # format-agnostic +--source include/have_ssl_communication.inc + +--echo # Skip starting the slave because we manually start with SSL later +--let $rpl_skip_start_slave= 1 +--source include/master-slave.inc + +--echo # +--echo # Setup +--connection master +CREATE USER replssl@localhost; +GRANT REPLICATION SLAVE on *.* to replssl@localhost REQUIRE SSL; + +set @orig_master_enabled= @@GLOBAL.rpl_semi_sync_master_enabled; +SET @@GLOBAL.rpl_semi_sync_master_enabled= 1; + +--connection slave +--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR +eval CHANGE MASTER TO + master_user='replssl', + master_password='', + master_ssl=1, + master_ssl_ca='$MYSQL_TEST_DIR/std_data/cacert.pem', + master_ssl_cert='$MYSQL_TEST_DIR/std_data/client-cert.pem', + master_ssl_key='$MYSQL_TEST_DIR/std_data/client-key.pem'; + +set @orig_slave_enabled= @@GLOBAL.rpl_semi_sync_slave_enabled; +SET @@GLOBAL.rpl_semi_sync_slave_enabled= 1; + +--source include/start_slave.inc + +--connection master +--echo # Verify Semi-Sync is active +--let $status_var= Rpl_semi_sync_master_clients +--let $status_var_value= 1 +--source include/wait_for_status_var.inc +SHOW STATUS LIKE 'Rpl_semi_sync_master_clients'; + +--echo # Create some table so slave can be seen as up-to-date and working +--connection master +CREATE TABLE t1 (a INT); +--sync_slave_with_master + +--echo # Disconnect the slave and wait until the master's dump thread is gone +--connection slave +STOP SLAVE; +--connection master + +--echo # MDEV-36663: Verifying dump thread connection is killed.. +# Prior to MDEV-36663 fixes, this would time out and +# Rpl_semi_sync_master_clients would remain 1. +--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.PROCESSLIST WHERE USER = 'replssl' +--source include/wait_condition.inc + +--let $n_master_clients= query_get_value(SHOW STATUS LIKE 'Rpl_semi_sync_master_clients', Value, 1) +if ($n_master_clients) +{ + --echo # Rpl_semi_sync_master_clients: $n_master_clients + --die Semi-sync dump thread connection not killed +} +--echo # ..done + +--echo # Cleanup +--connection master +SET @@GLOBAL.rpl_semi_sync_master_enabled= @orig_master_enabled; +DROP USER replssl@localhost; +DROP TABLE t1; + +--connection slave +SET @@GLOBAL.rpl_semi_sync_slave_enabled= @orig_slave_enabled; +CHANGE MASTER TO + master_user='root', + master_ssl=0, + master_ssl_ca='', + master_ssl_cert='', + master_ssl_key=''; + +--connection slave +--source include/start_slave.inc + +--source include/rpl_end.inc +--echo # End of rpl_semi_sync_ssl_stop.inc diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_typeconv.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_typeconv.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_typeconv.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_typeconv.test 2025-05-19 16:14:25.000000000 +0000 @@ -4,6 +4,7 @@ # Please check all dependent tests after modifying it # +--source include/long_test.inc --source include/have_binlog_format_row.inc --source include/master-slave.inc diff -Nru mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_xa_2pc_multi_engine.test mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_xa_2pc_multi_engine.test --- mariadb-10.11.11/mysql-test/suite/rpl/t/rpl_xa_2pc_multi_engine.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/rpl/t/rpl_xa_2pc_multi_engine.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,63 @@ +# +# This test ensures binlog order is correct for multi-engine, two-phase XA +# transactions. MDEV-26652 exposed a race condition which would allow +# concurrent transactions which modify the same table record to binlog in +# the "opposite" order, i.e. what _should_ be: +# T1 XA PREPARE +# T1 XA COMMIT +# T2 +# +# was binlogged as +# T1 XA PREPARE +# T2 +# T1 XA COMMIT +# +# which would break replication. +# +# Note that the actual fix for this issue was done with MDEV-21117. +# +# References: +# MDEV-26652: xa transactions binlogged in wrong order +# MDEV-21117: refine the server binlog-based recovery for semisync +# +source include/have_binlog_format_row.inc; +source include/have_innodb.inc; +source include/master-slave.inc; + +--connection master +create table t1 (a int primary key, b int) engine=innodb; +insert t1 values (1,1),(3,3),(5,5),(7,7); +create table t2 (m int) engine=aria; + + +--echo # Create multi-engine, two-phase XA transaction (T1) +xa start '1'; +insert t2 values (1); +update t1 set b=50 where b=5; +xa end '1'; + +# Aria doesn't support XA PREPARE, so disable warnings +--disable_warnings +xa prepare '1'; +--enable_warnings + +--echo # Create T2 +--connection server_1 +--send update t1 set b=10 where a=5 + +--connection master +xa commit '1'; + +--connection server_1 +--reap +--source include/save_master_gtid.inc + +--echo # This would hang prior to MDEV-21117 +--connection slave +--source include/sync_with_master_gtid.inc + +--connection master +drop table t1, t2; + +--source include/rpl_end.inc +--echo # End of rpl_xa_2pc_multi_engine.test diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/alter.opt mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.opt --- mariadb-10.11.11/mysql-test/suite/sql_sequence/alter.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.opt 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1 @@ +--innodb-sys-tables diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/alter.result mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.result --- mariadb-10.11.11/mysql-test/suite/sql_sequence/alter.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.result 2025-05-19 16:14:25.000000000 +0000 @@ -166,6 +166,32 @@ select next value for t1; next value for t1 11 +$check_innodb_flags; +is_sequence +12288 +alter table t1 sequence=0; +begin; +delete from t1; +rollback; +$check_innodb_flags; +is_sequence +0 +alter table t1 sequence=1; +$check_innodb_flags; +is_sequence +12288 +alter table t1 sequence=0, algorithm=copy; +$check_innodb_flags; +is_sequence +0 +alter table t1 sequence=1, algorithm=inplace; +ERROR 0A000: ALGORITHM=INPLACE is not supported. Reason: SEQUENCE. Try ALGORITHM=COPY +alter table t1 sequence=1, algorithm=copy; +$check_innodb_flags; +is_sequence +12288 +alter table t1 sequence=0, algorithm=inplace; +ERROR 0A000: ALGORITHM=INPLACE is not supported. Reason: SEQUENCE. Try ALGORITHM=COPY drop sequence t1; # # ALTER TABLE diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/alter.test mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.test --- mariadb-10.11.11/mysql-test/suite/sql_sequence/alter.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/alter.test 2025-05-19 16:14:25.000000000 +0000 @@ -80,6 +80,25 @@ show create sequence t1; select * from t1; select next value for t1; +let $check_innodb_flags = +select flag & 12288 is_sequence from information_schema.innodb_sys_tables +where name='test/t1'; +evalp $check_innodb_flags; +alter table t1 sequence=0; +begin; +delete from t1; +rollback; +evalp $check_innodb_flags; +alter table t1 sequence=1; +evalp $check_innodb_flags; +alter table t1 sequence=0, algorithm=copy; +evalp $check_innodb_flags; +--error ER_ALTER_OPERATION_NOT_SUPPORTED_REASON +alter table t1 sequence=1, algorithm=inplace; +alter table t1 sequence=1, algorithm=copy; +evalp $check_innodb_flags; +--error ER_ALTER_OPERATION_NOT_SUPPORTED_REASON +alter table t1 sequence=0, algorithm=inplace; drop sequence t1; --echo # diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/grant.result mariadb-10.11.13/mysql-test/suite/sql_sequence/grant.result --- mariadb-10.11.11/mysql-test/suite/sql_sequence/grant.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/grant.result 2025-05-19 16:14:25.000000000 +0000 @@ -47,14 +47,57 @@ 11 1 9223372036854775806 1 1 1000 0 0 connection only_alter; select next value for s1; -ERROR 42000: INSERT command denied to user 'only_alter'@'localhost' for table `mysqltest_1`.`s1` +ERROR 42000: SELECT, INSERT command denied to user 'only_alter'@'localhost' for table `mysqltest_1`.`s1` alter sequence s1 restart= 11; select * from s1; ERROR 42000: SELECT command denied to user 'only_alter'@'localhost' for table `mysqltest_1`.`s1` connection default; -drop database mysqltest_1; drop user 'normal'@'%'; drop user 'read_only'@'%'; drop user 'read_write'@'%'; drop user 'alter'@'%'; drop user 'only_alter'@'%'; +drop sequence s1; +# +# MDEV-36413 User without any privileges to a sequence can read from +# it and modify it via column default +# +create sequence s1; +create sequence s2; +select * from s2; +next_not_cached_value minimum_value maximum_value start_value increment cache_size cycle_option cycle_count +1 1 9223372036854775806 1 1 1000 0 0 +create table t2 (a int not null default(nextval(s1))); +insert into t2 values(); +create user u; +grant create, insert, select, drop on mysqltest_1.t1 to u; +grant insert, select on mysqltest_1.s1 to u; +grant select on mysqltest_1.t2 to u; +connect con1,localhost,u,,mysqltest_1; +select nextval(s2); +ERROR 42000: SELECT, INSERT command denied to user 'u'@'localhost' for table `mysqltest_1`.`s2` +show create sequence s2; +ERROR 42000: SHOW command denied to user 'u'@'localhost' for table `mysqltest_1`.`s2` +create table t1 (a int not null default(nextval(s1))); +drop table t1; +create table t1 (a int not null default(nextval(s1))) select a from t2; +insert into t1 values(); +select * from t1; +a +1 +2 +drop table t1; +create table t1 (a int not null default(nextval(s1))) select a from (select t2.a from t2,t2 as t3 where t2.a=t3.a) as t4; +drop table t1; +create table t1 (a int not null default(nextval(s2))); +ERROR 42000: SELECT, INSERT command denied to user 'u'@'localhost' for table `mysqltest_1`.`s2` +create table t1 (a int not null default(nextval(s1)), +b int not null default(nextval(s2))); +ERROR 42000: SELECT, INSERT command denied to user 'u'@'localhost' for table `mysqltest_1`.`s2` +disconnect con1; +connection default; +drop user u; +drop database mysqltest_1; +# +# End of 10.11 tests +# diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/grant.test mariadb-10.11.13/mysql-test/suite/sql_sequence/grant.test --- mariadb-10.11.11/mysql-test/suite/sql_sequence/grant.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/grant.test 2025-05-19 16:14:25.000000000 +0000 @@ -60,10 +60,58 @@ # connection default; -drop database mysqltest_1; drop user 'normal'@'%'; drop user 'read_only'@'%'; drop user 'read_write'@'%'; drop user 'alter'@'%'; drop user 'only_alter'@'%'; +drop sequence s1; + +--echo # +--echo # MDEV-36413 User without any privileges to a sequence can read from +--echo # it and modify it via column default +--echo # + +create sequence s1; +create sequence s2; +select * from s2; +create table t2 (a int not null default(nextval(s1))); +insert into t2 values(); + +create user u; +grant create, insert, select, drop on mysqltest_1.t1 to u; +grant insert, select on mysqltest_1.s1 to u; +grant select on mysqltest_1.t2 to u; + +--connect(con1,localhost,u,,mysqltest_1) +--error ER_TABLEACCESS_DENIED_ERROR +select nextval(s2); +--error ER_TABLEACCESS_DENIED_ERROR +show create sequence s2; + +create table t1 (a int not null default(nextval(s1))); +drop table t1; +create table t1 (a int not null default(nextval(s1))) select a from t2; +insert into t1 values(); +select * from t1; +drop table t1; +create table t1 (a int not null default(nextval(s1))) select a from (select t2.a from t2,t2 as t3 where t2.a=t3.a) as t4; +drop table t1; +--error ER_TABLEACCESS_DENIED_ERROR +create table t1 (a int not null default(nextval(s2))); +--error ER_TABLEACCESS_DENIED_ERROR +create table t1 (a int not null default(nextval(s1)), + b int not null default(nextval(s2))); +--disconnect con1 +--connection default +drop user u; + +# +# Cleanup +# + +drop database mysqltest_1; +--echo # +--echo # End of 10.11 tests +--echo # diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/gtid.result mariadb-10.11.13/mysql-test/suite/sql_sequence/gtid.result --- mariadb-10.11.11/mysql-test/suite/sql_sequence/gtid.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/gtid.result 2025-05-19 16:14:25.000000000 +0000 @@ -174,7 +174,7 @@ drop sequence s_db.s2; connection m_normal_2; select next value for s_db.s1; -ERROR 42000: INSERT command denied to user 'normal_2'@'localhost' for table `s_db`.`s1` +ERROR 42000: SELECT, INSERT command denied to user 'normal_2'@'localhost' for table `s_db`.`s1` create sequence s_db.s2; ERROR 42000: CREATE command denied to user 'normal_2'@'localhost' for table `s_db`.`s2` connection m_normal_1; diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/other.result mariadb-10.11.13/mysql-test/suite/sql_sequence/other.result --- mariadb-10.11.11/mysql-test/suite/sql_sequence/other.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/other.result 2025-05-19 16:14:25.000000000 +0000 @@ -48,7 +48,6 @@ insert into s1 (next_not_cached_value, minimum_value) values (100,1000); ERROR HY000: Field 'maximum_value' doesn't have a default value insert into s1 values (next value for s1, 1,9223372036854775806,1,1,1000,0,0); -ERROR HY000: Table 's1' is specified twice, both as a target for 'INSERT' and as a separate source for data insert into s1 values(1000,9223372036854775806,1,1,1,1000,0,0); ERROR HY000: Sequence 'test.s1' has out of range value for options insert into s1 values(0,9223372036854775806,1,1,1,1000,0,0); diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/other.test mariadb-10.11.13/mysql-test/suite/sql_sequence/other.test --- mariadb-10.11.11/mysql-test/suite/sql_sequence/other.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/other.test 2025-05-19 16:14:25.000000000 +0000 @@ -38,7 +38,6 @@ create sequence s2; --error ER_NO_DEFAULT_FOR_FIELD insert into s1 (next_not_cached_value, minimum_value) values (100,1000); ---error ER_UPDATE_TABLE_USED insert into s1 values (next value for s1, 1,9223372036854775806,1,1,1000,0,0); --error ER_SEQUENCE_INVALID_DATA insert into s1 values(1000,9223372036854775806,1,1,1,1000,0,0); diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/replication.result mariadb-10.11.13/mysql-test/suite/sql_sequence/replication.result --- mariadb-10.11.11/mysql-test/suite/sql_sequence/replication.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/replication.result 2025-05-19 16:14:25.000000000 +0000 @@ -285,7 +285,7 @@ drop sequence s_db.s2; connection m_normal_2; select NEXT VALUE for s_db.s1; -ERROR 42000: INSERT command denied to user 'normal_2'@'localhost' for table `s_db`.`s1` +ERROR 42000: SELECT, INSERT command denied to user 'normal_2'@'localhost' for table `s_db`.`s1` create sequence s_db.s2; ERROR 42000: CREATE command denied to user 'normal_2'@'localhost' for table `s_db`.`s2` connection m_normal_1; diff -Nru mariadb-10.11.11/mysql-test/suite/sql_sequence/view.test mariadb-10.11.13/mysql-test/suite/sql_sequence/view.test --- mariadb-10.11.11/mysql-test/suite/sql_sequence/view.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sql_sequence/view.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,5 +1,4 @@ --source include/have_sequence.inc ---source include/have_innodb.inc # # Test sequences with views diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result mariadb-10.11.13/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result --- mariadb-10.11.11/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result 2025-05-19 16:14:25.000000000 +0000 @@ -1,16 +1,17 @@ SET @start_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size; -'#---------------------BS_STVARS_022_01----------------------#' -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) -1 -1 Expected '#---------------------BS_STVARS_022_02----------------------#' -SET @@GLOBAL.innodb_buffer_pool_size=10485760; -Expected succeeded -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) +SELECT @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size_max +8388608 +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max +1 +SET GLOBAL innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max + 1048576; +Warnings: +Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '9437184' +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max 1 -1 Expected '#---------------------BS_STVARS_022_03----------------------#' SELECT @@GLOBAL.innodb_buffer_pool_size = VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES @@ -18,10 +19,6 @@ @@GLOBAL.innodb_buffer_pool_size = VARIABLE_VALUE 1 1 Expected -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) -1 -1 Expected SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_buffer_pool_size'; @@ -50,4 +47,7 @@ 1 Expected SELECT innodb_buffer_pool_size = @@SESSION.innodb_buffer_pool_size; ERROR 42S22: Unknown column 'innodb_buffer_pool_size' in 'SELECT' -# restart +SET GLOBAL innodb_buffer_pool_size = @start_buffer_pool_size; +SELECT @@innodb_buffer_pool_size = @start_buffer_pool_size; +@@innodb_buffer_pool_size = @start_buffer_pool_size +1 diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff --- mariadb-10.11.11/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff 2025-05-19 16:14:25.000000000 +0000 @@ -9,7 +9,7 @@ VARIABLE_COMMENT Number of InnoDB Adaptive Hash Index Partitions (default 8) NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 512 -@@ -71,7 +71,7 @@ +@@ -83,7 +83,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -18,20 +18,20 @@ VARIABLE_COMMENT The AUTOINC lock modes supported by InnoDB: 0 => Old style AUTOINC locking (for backward compatibility); 1 => New style AUTOINC locking; 2 => No AUTOINC locking (unsafe for SBR) NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -83,10 +83,10 @@ +@@ -95,10 +95,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Size of a single memory chunk for resizing buffer pool. Online buffer pool resizing happens at this granularity. 0 means autosize this variable based on buffer pool size. + VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 0 -NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_MAX_VALUE 4294967295 NUMERIC_BLOCK_SIZE 1048576 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -119,7 +119,7 @@ +@@ -131,7 +131,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 25 VARIABLE_SCOPE GLOBAL @@ -40,7 +40,50 @@ VARIABLE_COMMENT Dump only the hottest N% of each buffer pool, defaults to 25 NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 100 -@@ -203,7 +203,7 @@ +@@ -203,10 +203,10 @@ + SESSION_VALUE NULL + DEFAULT_VALUE 134217728 + VARIABLE_SCOPE GLOBAL +-VARIABLE_TYPE BIGINT UNSIGNED ++VARIABLE_TYPE INT UNSIGNED + VARIABLE_COMMENT The size of the memory buffer InnoDB uses to cache data and indexes of its tables. + NUMERIC_MIN_VALUE 2097152 +-NUMERIC_MAX_VALUE 18446744073701163008 ++NUMERIC_MAX_VALUE 4292870144 + NUMERIC_BLOCK_SIZE 1048576 + ENUM_VALUE_LIST NULL + READ_ONLY NO +@@ -215,11 +215,11 @@ + SESSION_VALUE NULL + DEFAULT_VALUE 0 + VARIABLE_SCOPE GLOBAL +-VARIABLE_TYPE BIGINT UNSIGNED ++VARIABLE_TYPE INT UNSIGNED + VARIABLE_COMMENT Minimum innodb_buffer_pool_size for dynamic shrinking on memory pressure + NUMERIC_MIN_VALUE 0 +-NUMERIC_MAX_VALUE 18446744073701163008 +-NUMERIC_BLOCK_SIZE 8388608 ++NUMERIC_MAX_VALUE 4292870144 ++NUMERIC_BLOCK_SIZE 2097152 + ENUM_VALUE_LIST NULL + READ_ONLY NO + COMMAND_LINE_ARGUMENT REQUIRED +@@ -227,11 +227,11 @@ + SESSION_VALUE NULL + DEFAULT_VALUE 0 + VARIABLE_SCOPE GLOBAL +-VARIABLE_TYPE BIGINT UNSIGNED ++VARIABLE_TYPE INT UNSIGNED + VARIABLE_COMMENT Maximum innodb_buffer_pool_size + NUMERIC_MIN_VALUE 0 +-NUMERIC_MAX_VALUE 18446744073701163008 +-NUMERIC_BLOCK_SIZE 8388608 ++NUMERIC_MAX_VALUE 4292870144 ++NUMERIC_BLOCK_SIZE 2097152 + ENUM_VALUE_LIST NULL + READ_ONLY YES + COMMAND_LINE_ARGUMENT REQUIRED +@@ -239,7 +239,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -49,7 +92,7 @@ VARIABLE_COMMENT A number between [0, 100] that tells how oftern buffer pool dump status in percentages should be printed. E.g. 10 means that buffer pool dump status is printed when every 10% of number of buffer pool pages are dumped. Default is 0 (only start and end status is printed). NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -323,7 +323,7 @@ +@@ -359,7 +359,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 5 VARIABLE_SCOPE GLOBAL @@ -58,7 +101,7 @@ VARIABLE_COMMENT If the compression failure rate of a table is greater than this number more padding is added to the pages to reduce the failures. A value of zero implies no padding NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -347,7 +347,7 @@ +@@ -383,7 +383,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 50 VARIABLE_SCOPE GLOBAL @@ -67,7 +110,7 @@ VARIABLE_COMMENT Percentage of empty space on a data page that can be reserved to make the page compressible. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 75 -@@ -623,7 +623,7 @@ +@@ -671,7 +671,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 600 VARIABLE_SCOPE GLOBAL @@ -76,7 +119,7 @@ VARIABLE_COMMENT Maximum number of seconds that semaphore times out in InnoDB. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 4294967295 -@@ -671,7 +671,7 @@ +@@ -719,7 +719,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL @@ -85,7 +128,7 @@ VARIABLE_COMMENT Number of iterations over which the background flushing is averaged. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 1000 -@@ -695,7 +695,7 @@ +@@ -743,7 +743,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -94,7 +137,7 @@ VARIABLE_COMMENT Controls the durability/speed trade-off for commits. Set to 0 (write and flush redo log to disk only once per second), 1 (flush to disk at each commit), 2 (write to log at commit but flush to disk only once per second) or 3 (flush to disk at prepare and at commit, slower and usually redundant). 1 and 3 guarantees that after a crash, committed transactions will not be lost and will be consistent with the binlog and other transactional engines. 2 can get inconsistent and lose transactions if there is a power failure or kernel crash but not if mysqld crashes. 0 has no guarantees in case of crash. 0 and 2 can be faster than 1 or 3. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 3 -@@ -719,7 +719,7 @@ +@@ -767,7 +767,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -103,7 +146,7 @@ VARIABLE_COMMENT Set to 0 (don't flush neighbors from buffer pool), 1 (flush contiguous neighbors from buffer pool) or 2 (flush neighbors from buffer pool), when flushing a block NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -755,7 +755,7 @@ +@@ -803,7 +803,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -112,7 +155,7 @@ VARIABLE_COMMENT Helps to save your data in case the disk image of the database becomes corrupt. Value 5 can return bogus data, and 6 can permanently corrupt data. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 6 -@@ -779,10 +779,10 @@ +@@ -827,10 +827,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 8000000 VARIABLE_SCOPE GLOBAL @@ -125,7 +168,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -815,7 +815,7 @@ +@@ -863,7 +863,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 84 VARIABLE_SCOPE GLOBAL @@ -134,7 +177,7 @@ VARIABLE_COMMENT InnoDB Fulltext search maximum token size in characters NUMERIC_MIN_VALUE 10 NUMERIC_MAX_VALUE 84 -@@ -827,7 +827,7 @@ +@@ -875,7 +875,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 3 VARIABLE_SCOPE GLOBAL @@ -143,7 +186,7 @@ VARIABLE_COMMENT InnoDB Fulltext search minimum token size in characters NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 16 -@@ -839,7 +839,7 @@ +@@ -887,7 +887,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000 VARIABLE_SCOPE GLOBAL @@ -152,7 +195,7 @@ VARIABLE_COMMENT InnoDB Fulltext search number of words to optimize for each optimize table call NUMERIC_MIN_VALUE 1000 NUMERIC_MAX_VALUE 10000 -@@ -851,10 +851,10 @@ +@@ -899,10 +899,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000000000 VARIABLE_SCOPE GLOBAL @@ -165,7 +208,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -875,7 +875,7 @@ +@@ -923,7 +923,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2 VARIABLE_SCOPE GLOBAL @@ -174,7 +217,7 @@ VARIABLE_COMMENT InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 16 -@@ -887,10 +887,10 @@ +@@ -935,10 +935,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 640000000 VARIABLE_SCOPE GLOBAL @@ -187,7 +230,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -935,22 +935,22 @@ +@@ -983,7 +983,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 200 VARIABLE_SCOPE GLOBAL @@ -195,27 +238,17 @@ +VARIABLE_TYPE INT UNSIGNED VARIABLE_COMMENT Number of IOPs the server can do. Tunes the background IO rate NUMERIC_MIN_VALUE 100 --NUMERIC_MAX_VALUE 18446744073709551615 -+NUMERIC_MAX_VALUE 4294967295 - NUMERIC_BLOCK_SIZE 0 - ENUM_VALUE_LIST NULL - READ_ONLY NO - COMMAND_LINE_ARGUMENT REQUIRED - VARIABLE_NAME INNODB_IO_CAPACITY_MAX + NUMERIC_MAX_VALUE 4294967295 +@@ -995,7 +995,7 @@ SESSION_VALUE NULL --DEFAULT_VALUE 18446744073709551615 -+DEFAULT_VALUE 4294967295 + DEFAULT_VALUE 4294967295 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED VARIABLE_COMMENT Limit to which innodb_io_capacity can be inflated. NUMERIC_MIN_VALUE 100 --NUMERIC_MAX_VALUE 18446744073709551615 -+NUMERIC_MAX_VALUE 4294967295 - NUMERIC_BLOCK_SIZE 0 - ENUM_VALUE_LIST NULL - READ_ONLY NO -@@ -1043,10 +1043,10 @@ + NUMERIC_MAX_VALUE 4294967295 +@@ -1115,10 +1115,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 32 VARIABLE_SCOPE GLOBAL @@ -228,7 +261,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1055,10 +1055,10 @@ +@@ -1127,10 +1127,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 1536 VARIABLE_SCOPE GLOBAL @@ -241,7 +274,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1091,10 +1091,10 @@ +@@ -1163,10 +1163,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -254,7 +287,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1103,7 +1103,7 @@ +@@ -1175,7 +1175,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -263,7 +296,7 @@ VARIABLE_COMMENT Maximum delay of user threads in micro-seconds NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 10000000 -@@ -1235,10 +1235,10 @@ +@@ -1307,10 +1307,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -276,7 +309,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -1259,7 +1259,7 @@ +@@ -1331,7 +1331,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 16384 VARIABLE_SCOPE GLOBAL @@ -285,16 +318,16 @@ VARIABLE_COMMENT Page size to use for all InnoDB tablespaces. NUMERIC_MIN_VALUE 4096 NUMERIC_MAX_VALUE 65536 -@@ -1295,7 +1295,7 @@ +@@ -1367,7 +1367,7 @@ SESSION_VALUE NULL - DEFAULT_VALUE 1000 + DEFAULT_VALUE 127 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 5000 -@@ -1307,7 +1307,7 @@ +@@ -1379,7 +1379,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 128 VARIABLE_SCOPE GLOBAL @@ -303,7 +336,7 @@ VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 128 -@@ -1343,7 +1343,7 @@ +@@ -1415,7 +1415,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 56 VARIABLE_SCOPE GLOBAL @@ -312,7 +345,7 @@ VARIABLE_COMMENT Number of pages that must be accessed sequentially for InnoDB to trigger a readahead. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 64 -@@ -1427,7 +1427,7 @@ +@@ -1499,7 +1499,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1048576 VARIABLE_SCOPE GLOBAL @@ -321,7 +354,7 @@ VARIABLE_COMMENT Memory buffer size for index creation NUMERIC_MIN_VALUE 65536 NUMERIC_MAX_VALUE 67108864 -@@ -1595,10 +1595,10 @@ +@@ -1667,10 +1667,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/r/sysvars_innodb.result mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_innodb.result --- mariadb-10.11.11/mysql-test/suite/sys_vars/r/sysvars_innodb.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_innodb.result 2025-05-19 16:14:25.000000000 +0000 @@ -96,7 +96,7 @@ DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED -VARIABLE_COMMENT Size of a single memory chunk for resizing buffer pool. Online buffer pool resizing happens at this granularity. 0 means autosize this variable based on buffer pool size. +VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 18446744073709551615 NUMERIC_BLOCK_SIZE 1048576 @@ -206,11 +206,35 @@ VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_COMMENT The size of the memory buffer InnoDB uses to cache data and indexes of its tables. NUMERIC_MIN_VALUE 2097152 -NUMERIC_MAX_VALUE 9223372036854775807 +NUMERIC_MAX_VALUE 18446744073701163008 NUMERIC_BLOCK_SIZE 1048576 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_BUFFER_POOL_SIZE_AUTO_MIN +SESSION_VALUE NULL +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT Minimum innodb_buffer_pool_size for dynamic shrinking on memory pressure +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073701163008 +NUMERIC_BLOCK_SIZE 8388608 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_BUFFER_POOL_SIZE_MAX +SESSION_VALUE NULL +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT Maximum innodb_buffer_pool_size +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073701163008 +NUMERIC_BLOCK_SIZE 8388608 +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_BUF_DUMP_STATUS_FREQUENCY SESSION_VALUE NULL DEFAULT_VALUE 0 @@ -962,19 +986,19 @@ VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_COMMENT Number of IOPs the server can do. Tunes the background IO rate NUMERIC_MIN_VALUE 100 -NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_MAX_VALUE 4294967295 NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_IO_CAPACITY_MAX SESSION_VALUE NULL -DEFAULT_VALUE 18446744073709551615 +DEFAULT_VALUE 4294967295 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_COMMENT Limit to which innodb_io_capacity can be inflated. NUMERIC_MIN_VALUE 100 -NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_MAX_VALUE 4294967295 NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO @@ -1020,7 +1044,7 @@ DEFAULT_VALUE OFF VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Force checkpoint now +VARIABLE_COMMENT Write back dirty pages from the buffer pool and update the log checkpoint NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL @@ -1068,7 +1092,7 @@ DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE INT UNSIGNED -VARIABLE_COMMENT Delay between log buffer spin lock polls (0 to use a blocking latch) +VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 6000 NUMERIC_BLOCK_SIZE 0 @@ -1571,10 +1595,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 20 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED VARIABLE_COMMENT The number of leaf index pages to sample when calculating persistent statistics (by ANALYZE, default 20) NUMERIC_MIN_VALUE 1 -NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_MAX_VALUE 4294967295 NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO @@ -1595,10 +1619,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 8 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED VARIABLE_COMMENT The number of leaf index pages to sample when calculating transient statistics (if persistent statistics are not used, default 8) NUMERIC_MIN_VALUE 1 -NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_MAX_VALUE 4294967295 NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result --- mariadb-10.11.11/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result 2025-05-19 16:14:25.000000000 +0000 @@ -2325,11 +2325,11 @@ VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS VARIABLE_SCOPE SESSION VARIABLE_TYPE SET -VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. +VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. fix_derived_table_read_cost = Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier +ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier,fix_derived_table_read_cost READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result --- mariadb-10.11.11/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result 2025-05-19 16:14:25.000000000 +0000 @@ -2495,11 +2495,11 @@ VARIABLE_NAME OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS VARIABLE_SCOPE SESSION VARIABLE_TYPE SET -VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. +VARIABLE_COMMENT A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. fix_innodb_cardinality = Disable doubling of the Cardinality for InnoDB secondary keys. fix_reuse_range_for_ref = Do a better job at reusing range access estimates when estimating ref access. fix_card_multiplier = Fix the computation in selectivity_for_indexes. fix_derived_table_read_cost = Fix the cost of reading materialized derived table. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier +ENUM_VALUE_LIST adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by,fix_innodb_cardinality,fix_reuse_range_for_ref,fix_card_multiplier,fix_derived_table_read_cost READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME OPTIMIZER_EXTRA_PRUNING_DEPTH diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/r/wsrep_forced_binlog_format_basic.result mariadb-10.11.13/mysql-test/suite/sys_vars/r/wsrep_forced_binlog_format_basic.result --- mariadb-10.11.11/mysql-test/suite/sys_vars/r/wsrep_forced_binlog_format_basic.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/r/wsrep_forced_binlog_format_basic.result 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -# -# wsrep_forced_binlog_format -# -# save the initial value -SET @wsrep_forced_binlog_format_global_saved = @@global.wsrep_forced_binlog_format; -# default -SELECT @@global.wsrep_forced_binlog_format; -@@global.wsrep_forced_binlog_format -NONE - -# scope -SELECT @@session.wsrep_forced_binlog_format; -ERROR HY000: Variable 'wsrep_forced_binlog_format' is a GLOBAL variable -SET @@global.wsrep_forced_binlog_format=STATEMENT; -SELECT @@global.wsrep_forced_binlog_format; -@@global.wsrep_forced_binlog_format -STATEMENT - -# valid values -SET @@global.wsrep_forced_binlog_format=STATEMENT; -SELECT @@global.wsrep_forced_binlog_format; -@@global.wsrep_forced_binlog_format -STATEMENT -SET @@global.wsrep_forced_binlog_format=ROW; -SELECT @@global.wsrep_forced_binlog_format; -@@global.wsrep_forced_binlog_format -ROW -SET @@global.wsrep_forced_binlog_format=MIXED; -SELECT @@global.wsrep_forced_binlog_format; -@@global.wsrep_forced_binlog_format -MIXED -SET @@global.wsrep_forced_binlog_format=NONE; -SELECT @@global.wsrep_forced_binlog_format; -@@global.wsrep_forced_binlog_format -NONE -SET @@global.wsrep_forced_binlog_format=default; -SELECT @@global.wsrep_forced_binlog_format; -@@global.wsrep_forced_binlog_format -NONE - -# invalid values -SET @@global.wsrep_forced_binlog_format=NULL; -ERROR 42000: Variable 'wsrep_forced_binlog_format' can't be set to the value of 'NULL' -SET @@global.wsrep_forced_binlog_format='junk'; -ERROR 42000: Variable 'wsrep_forced_binlog_format' can't be set to the value of 'junk' -SET @@global.wsrep_forced_binlog_format=ON; -ERROR 42000: Variable 'wsrep_forced_binlog_format' can't be set to the value of 'ON' - -# restore the initial value -SET @@global.wsrep_forced_binlog_format = @wsrep_forced_binlog_format_global_saved; -# End of test diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/r/wsrep_replicate_myisam_basic.result mariadb-10.11.13/mysql-test/suite/sys_vars/r/wsrep_replicate_myisam_basic.result --- mariadb-10.11.11/mysql-test/suite/sys_vars/r/wsrep_replicate_myisam_basic.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/r/wsrep_replicate_myisam_basic.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,15 @@ +# +# wsrep_replicate_myisam +# +# save the initial value +SET @wsrep_mode_saved = @@global.wsrep_mode; + +# scope and valid values +SET @@global.wsrep_mode=REPLICATE_MYISAM; +SELECT @@global.wsrep_mode; +@@global.wsrep_mode +REPLICATE_MYISAM + +# restore the initial value +SET @@global.wsrep_mode = @wsrep_mode_saved; +# End of test diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt --- mariadb-10.11.11/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ ---innodb-buffer-pool-chunk-size=2M diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt --- mariadb-10.11.11/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=8m diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test --- mariadb-10.11.11/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test 2025-05-19 16:14:25.000000000 +0000 @@ -24,35 +24,19 @@ --source include/have_innodb.inc -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - SET @start_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size; ---echo '#---------------------BS_STVARS_022_01----------------------#' -#################################################################### -# Displaying default value # -#################################################################### -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - - --echo '#---------------------BS_STVARS_022_02----------------------#' #################################################################### # Check if Value can set # #################################################################### -SET @@GLOBAL.innodb_buffer_pool_size=10485760; ---echo Expected succeeded ---source include/wait_condition.inc - -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - - - +--enable_warnings +SELECT @@GLOBAL.innodb_buffer_pool_size_max; +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +SET GLOBAL innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max + 1048576; +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +--disable_warnings --echo '#---------------------BS_STVARS_022_03----------------------#' ################################################################# @@ -66,9 +50,6 @@ --enable_warnings --echo 1 Expected -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - --disable_warnings SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES @@ -76,8 +57,6 @@ --enable_warnings --echo 1 Expected - - --echo '#---------------------BS_STVARS_022_04----------------------#' ################################################################################ # Check if accessing variable with and without GLOBAL point to same variable # @@ -111,4 +90,6 @@ # Restore the original buffer pool size. ---source include/restart_mysqld.inc +SET GLOBAL innodb_buffer_pool_size = @start_buffer_pool_size; + +SELECT @@innodb_buffer_pool_size = @start_buffer_pool_size; diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/t/sysvars_innodb.opt mariadb-10.11.13/mysql-test/suite/sys_vars/t/sysvars_innodb.opt --- mariadb-10.11.11/mysql-test/suite/sys_vars/t/sysvars_innodb.opt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/t/sysvars_innodb.opt 2025-05-19 16:14:25.000000000 +0000 @@ -1,2 +1,4 @@ ---loose-innodb-flush-log-at-timeout=3 +--innodb +--innodb-purge-rseg-truncate-frequency=64 +--innodb-flush-log-at-timeout=3 --table_open_cache=200 diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/t/sysvars_innodb.test mariadb-10.11.13/mysql-test/suite/sys_vars/t/sysvars_innodb.test --- mariadb-10.11.11/mysql-test/suite/sys_vars/t/sysvars_innodb.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/t/sysvars_innodb.test 2025-05-19 16:14:25.000000000 +0000 @@ -3,6 +3,10 @@ --source include/not_valgrind.inc --source include/word_size.inc +--disable_query_log +call mtr.add_suppression("'innodb-purge-rseg-truncate-frequency' was removed"); +--enable_query_log + --vertical_results --replace_regex /^\/\S+/PATH/ /\.\//PATH/ select VARIABLE_NAME, SESSION_VALUE, DEFAULT_VALUE, VARIABLE_SCOPE, VARIABLE_TYPE, VARIABLE_COMMENT, NUMERIC_MIN_VALUE, NUMERIC_MAX_VALUE, NUMERIC_BLOCK_SIZE, ENUM_VALUE_LIST, READ_ONLY, COMMAND_LINE_ARGUMENT from information_schema.system_variables diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/t/wsrep_forced_binlog_format_basic.test mariadb-10.11.13/mysql-test/suite/sys_vars/t/wsrep_forced_binlog_format_basic.test --- mariadb-10.11.11/mysql-test/suite/sys_vars/t/wsrep_forced_binlog_format_basic.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/t/wsrep_forced_binlog_format_basic.test 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ ---source include/have_wsrep.inc - ---echo # ---echo # wsrep_forced_binlog_format ---echo # - ---echo # save the initial value -SET @wsrep_forced_binlog_format_global_saved = @@global.wsrep_forced_binlog_format; - ---echo # default -SELECT @@global.wsrep_forced_binlog_format; - ---echo ---echo # scope ---error ER_INCORRECT_GLOBAL_LOCAL_VAR -SELECT @@session.wsrep_forced_binlog_format; -SET @@global.wsrep_forced_binlog_format=STATEMENT; -SELECT @@global.wsrep_forced_binlog_format; - ---echo ---echo # valid values -SET @@global.wsrep_forced_binlog_format=STATEMENT; -SELECT @@global.wsrep_forced_binlog_format; -SET @@global.wsrep_forced_binlog_format=ROW; -SELECT @@global.wsrep_forced_binlog_format; -SET @@global.wsrep_forced_binlog_format=MIXED; -SELECT @@global.wsrep_forced_binlog_format; -SET @@global.wsrep_forced_binlog_format=NONE; -SELECT @@global.wsrep_forced_binlog_format; -SET @@global.wsrep_forced_binlog_format=default; -SELECT @@global.wsrep_forced_binlog_format; - ---echo ---echo # invalid values ---error ER_WRONG_VALUE_FOR_VAR -SET @@global.wsrep_forced_binlog_format=NULL; ---error ER_WRONG_VALUE_FOR_VAR -SET @@global.wsrep_forced_binlog_format='junk'; ---error ER_WRONG_VALUE_FOR_VAR -SET @@global.wsrep_forced_binlog_format=ON; - ---echo ---echo # restore the initial value -SET @@global.wsrep_forced_binlog_format = @wsrep_forced_binlog_format_global_saved; - ---echo # End of test diff -Nru mariadb-10.11.11/mysql-test/suite/sys_vars/t/wsrep_replicate_myisam_basic.test mariadb-10.11.13/mysql-test/suite/sys_vars/t/wsrep_replicate_myisam_basic.test --- mariadb-10.11.11/mysql-test/suite/sys_vars/t/wsrep_replicate_myisam_basic.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/sys_vars/t/wsrep_replicate_myisam_basic.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,19 @@ +--source include/have_wsrep.inc + +--echo # +--echo # wsrep_replicate_myisam +--echo # + +--echo # save the initial value +SET @wsrep_mode_saved = @@global.wsrep_mode; + +--echo +--echo # scope and valid values +SET @@global.wsrep_mode=REPLICATE_MYISAM; +SELECT @@global.wsrep_mode; + +--echo +--echo # restore the initial value +SET @@global.wsrep_mode = @wsrep_mode_saved; + +--echo # End of test diff -Nru mariadb-10.11.11/mysql-test/suite/versioning/r/partition.result mariadb-10.11.13/mysql-test/suite/versioning/r/partition.result --- mariadb-10.11.11/mysql-test/suite/versioning/r/partition.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/versioning/r/partition.result 2025-05-19 16:14:25.000000000 +0000 @@ -3445,6 +3445,20 @@ create table t (a int) with system versioning partition by system_time partitions 3; ERROR HY000: Maybe missing parameters: no rotation condition for multiple HISTORY partitions. # +# MDEV-36115 InnoDB: assertion: node->pcur->rel_pos == BTR_PCUR_ON +# in row_update_for_mysql +# +create table t (a int key) engine=innodb +with system versioning +partition by key() partitions 3; +start transaction; +insert into t values (1),(2),(3),(4),(5),(6),(7),(8); +set timestamp=+1; +delete from t; +insert into t values (1),(2); +DELETE from t; +drop table t; +# # End of 10.5 tests # # @@ -3470,4 +3484,25 @@ # # End of 10.9 tests # +# +# MDEV-34775 Wrong reopen of already open routine due to auto-create in SP +# +create table t (a int) with system versioning +partition by system_time +interval 1 minute auto; +create function f() +returns int +begin +replace into t select * from t; +return 0; +end $ +set timestamp= @@timestamp + 61; +select f(); +f() +0 +drop table t; +drop function f; +# +# End of 10.11 tests +# set global innodb_stats_persistent= @save_persistent; diff -Nru mariadb-10.11.11/mysql-test/suite/versioning/t/partition.test mariadb-10.11.13/mysql-test/suite/versioning/t/partition.test --- mariadb-10.11.11/mysql-test/suite/versioning/t/partition.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/versioning/t/partition.test 2025-05-19 16:14:25.000000000 +0000 @@ -2676,6 +2676,22 @@ create table t (a int) with system versioning partition by system_time partitions 3; --echo # +--echo # MDEV-36115 InnoDB: assertion: node->pcur->rel_pos == BTR_PCUR_ON +--echo # in row_update_for_mysql +--echo # +create table t (a int key) engine=innodb +with system versioning +partition by key() partitions 3; + +start transaction; +insert into t values (1),(2),(3),(4),(5),(6),(7),(8); +set timestamp=+1; +delete from t; +insert into t values (1),(2); +DELETE from t; +drop table t; + +--echo # --echo # End of 10.5 tests --echo # @@ -2717,5 +2733,32 @@ --echo # End of 10.9 tests --echo # +--echo # +--echo # MDEV-34775 Wrong reopen of already open routine due to auto-create in SP +--echo # + +create table t (a int) with system versioning +partition by system_time +interval 1 minute auto; + +--delimiter $ +create function f() +returns int +begin + replace into t select * from t; + return 0; +end $ +--delimiter ; + +set timestamp= @@timestamp + 61; +select f(); + +drop table t; +drop function f; + +--echo # +--echo # End of 10.11 tests +--echo # + set global innodb_stats_persistent= @save_persistent; --source suite/versioning/common_finish.inc diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/README mariadb-10.11.13/mysql-test/suite/wsrep/README --- mariadb-10.11.11/mysql-test/suite/wsrep/README 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/README 2025-05-19 16:14:25.000000000 +0000 @@ -4,4 +4,3 @@ * As these tests are specific to wsrep-related functionalities, they must skip on server built without wsrep patch (vanilla). (-DWITH_WSREP=OFF) See : include/have_wsrep.inc, include/have_wsrep_enabled.inc, not_wsrep.inc - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/include/check_galera_version.inc mariadb-10.11.13/mysql-test/suite/wsrep/include/check_galera_version.inc --- mariadb-10.11.11/mysql-test/suite/wsrep/include/check_galera_version.inc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/include/check_galera_version.inc 2025-05-19 16:14:25.000000000 +0000 @@ -44,4 +44,3 @@ } --echo # Correct Galera library found - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/r/plugin.result mariadb-10.11.13/mysql-test/suite/wsrep/r/plugin.result --- mariadb-10.11.11/mysql-test/suite/wsrep/r/plugin.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/r/plugin.result 2025-05-19 16:14:25.000000000 +0000 @@ -1,3 +1,3 @@ -SELECT plugin_name,plugin_version,plugin_maturity FROM information_schema.plugins where plugin_name like 'wsrep' ORDER BY plugin_maturity,plugin_name; +SELECT plugin_name,plugin_version,plugin_maturity FROM information_schema.plugins WHERE plugin_name like 'wsrep' ORDER BY plugin_maturity,plugin_name; plugin_name plugin_version plugin_maturity wsrep 1.0 Stable diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep-recover-gtid-nobinlog.result mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-gtid-nobinlog.result --- mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep-recover-gtid-nobinlog.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-gtid-nobinlog.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,18 @@ +CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB; +# Case 1: Server goes through graceful shutdown and is restarted +connection default; +INSERT INTO t1 VALUES (1); +Expect 100-10-2 +SELECT WSREP_LAST_SEEN_GTID(); +WSREP_LAST_SEEN_GTID() +100-10-2 +Performing --wsrep-recover ... +Using --wsrep-start-position when starting mysqld ... +Expect 100-10-2 +SELECT WSREP_LAST_SEEN_GTID(); +WSREP_LAST_SEEN_GTID() +100-10-2 +SELECT * FROM t1; +f1 +1 +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep-recover-gtid.result mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-gtid.result --- mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep-recover-gtid.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-gtid.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,65 @@ +CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB; +# Case 1: Server goes through graceful shutdown and is restarted +connection default; +INSERT INTO t1 VALUES (1); +Performing --wsrep-recover ... +Using --wsrep-start-position when starting mysqld ... +Expect 100-10-2 +SELECT WSREP_LAST_SEEN_GTID(); +WSREP_LAST_SEEN_GTID() +100-10-2 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +100-10-2 +SELECT * FROM t1; +f1 +1 +# Case 2: Server is killed after the transaction gets prepared +# but before it is written into binlog. As there is not GTID assigned, +# the transaction must be rolled back during recovery. +connect con, localhost, root; +SET DEBUG_SYNC = "ha_commit_trans_after_prepare SIGNAL reached WAIT_FOR continue"; +INSERT INTO t1 VALUES (2); +connection default; +SET DEBUG_SYNC = "now WAIT_FOR reached"; +# Kill the server +Performing --wsrep-recover ... +Using --wsrep-start-position when starting mysqld ... +Expect 100-10-2 +SELECT WSREP_LAST_SEEN_GTID(); +WSREP_LAST_SEEN_GTID() +100-10-2 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +100-10-2 +Expect 1 +SELECT * FROM t1; +f1 +1 +disconnect con; +# Case 3: Server is killed after the transaction gets written into binlog +# but before it is committed in storage engine. In this case the +# transaction must be committed during recovery as it had a valid +# GTID assigned. +connect con, localhost, root; +SET DEBUG_SYNC = "commit_before_get_LOCK_commit_ordered SIGNAL reached WAIT_FOR continue"; +INSERT INTO t1 VALUES (3); +connection default; +SET DEBUG_SYNC = "now WAIT_FOR reached"; +# Kill the server +Performing --wsrep-recover ... +Using --wsrep-start-position when starting mysqld ... +Expect 100-10-3 +SELECT WSREP_LAST_SEEN_GTID(); +WSREP_LAST_SEEN_GTID() +100-10-3 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +100-10-3 +Expect 1 3 +SELECT * FROM t1; +f1 +1 +3 +disconnect con; +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep-recover-v25,binlogon.rdiff mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-v25,binlogon.rdiff --- mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep-recover-v25,binlogon.rdiff 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep-recover-v25,binlogon.rdiff 2025-05-19 16:14:25.000000000 +0000 @@ -1,5 +1,5 @@ --- r/wsrep-recover-v25.result -+++ r/wsrep-recover-v25.reject ++++ r/wsrep-recover-v25,binlogoin.reject @@ -12,4 +12,16 @@ SELECT VARIABLE_VALUE `expect 6` FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_last_committed'; expect 6 diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep_forced_binlog_format.result mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep_forced_binlog_format.result --- mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep_forced_binlog_format.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep_forced_binlog_format.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,51 @@ +# +# wsrep_forced_binlog_format +# +# save the initial value +SET @wsrep_forced_binlog_format_global_saved = @@global.wsrep_forced_binlog_format; +# default +SELECT @@global.wsrep_forced_binlog_format; +@@global.wsrep_forced_binlog_format +NONE + +# scope +SELECT @@session.wsrep_forced_binlog_format; +ERROR HY000: Variable 'wsrep_forced_binlog_format' is a GLOBAL variable +SET @@global.wsrep_forced_binlog_format=STATEMENT; +SELECT @@global.wsrep_forced_binlog_format; +@@global.wsrep_forced_binlog_format +STATEMENT + +# valid values +SET @@global.wsrep_forced_binlog_format=STATEMENT; +SELECT @@global.wsrep_forced_binlog_format; +@@global.wsrep_forced_binlog_format +STATEMENT +SET @@global.wsrep_forced_binlog_format=ROW; +SELECT @@global.wsrep_forced_binlog_format; +@@global.wsrep_forced_binlog_format +ROW +SET @@global.wsrep_forced_binlog_format=MIXED; +SELECT @@global.wsrep_forced_binlog_format; +@@global.wsrep_forced_binlog_format +MIXED +SET @@global.wsrep_forced_binlog_format=NONE; +SELECT @@global.wsrep_forced_binlog_format; +@@global.wsrep_forced_binlog_format +NONE +SET @@global.wsrep_forced_binlog_format=default; +SELECT @@global.wsrep_forced_binlog_format; +@@global.wsrep_forced_binlog_format +NONE + +# invalid values +SET @@global.wsrep_forced_binlog_format=NULL; +ERROR 42000: Variable 'wsrep_forced_binlog_format' can't be set to the value of 'NULL' +SET @@global.wsrep_forced_binlog_format='junk'; +ERROR 42000: Variable 'wsrep_forced_binlog_format' can't be set to the value of 'junk' +SET @@global.wsrep_forced_binlog_format=ON; +ERROR 42000: Variable 'wsrep_forced_binlog_format' can't be set to the value of 'ON' + +# restore the initial value +SET @@global.wsrep_forced_binlog_format = @wsrep_forced_binlog_format_global_saved; +# End of test diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep_mixed_case_cmd_arg.result mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep_mixed_case_cmd_arg.result --- mariadb-10.11.11/mysql-test/suite/wsrep/r/wsrep_mixed_case_cmd_arg.result 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/r/wsrep_mixed_case_cmd_arg.result 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,8 @@ +# +# MDEV-27126: my_getopt compares option names case sensitively +# +# Check if the variable is set correctly from options +SELECT @@GLOBAL.wsrep_slave_uk_checks; +@@GLOBAL.wsrep_slave_uk_checks +1 +# End of test. diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/suite.pm mariadb-10.11.13/mysql-test/suite/wsrep/suite.pm --- mariadb-10.11.11/mysql-test/suite/wsrep/suite.pm 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/suite.pm 2025-05-19 16:14:25.000000000 +0000 @@ -9,9 +9,9 @@ push @::global_suppressions, ( - qr(WSREP: Could not open saved state file for reading: .*), - qr(WSREP: Could not open state file for reading: .*), - qr|WSREP: access file\(.*gvwstate.dat\) failed\(No such file or directory\)|, + qr(WSREP: Could not open saved state file for reading: ), + qr(WSREP: Could not open state file for reading: ), + qr|WSREP: access file\(.*gvwstate.dat\) failed ?\(No such file or directory\)|, ); bless { }; diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/binlog_format.cnf mariadb-10.11.13/mysql-test/suite/wsrep/t/binlog_format.cnf --- mariadb-10.11.11/mysql-test/suite/wsrep/t/binlog_format.cnf 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/binlog_format.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -5,4 +5,3 @@ wsrep-provider=@ENV.WSREP_PROVIDER wsrep-cluster-address=gcomm:// innodb_autoinc_lock_mode=2 - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/foreign_key.test mariadb-10.11.13/mysql-test/suite/wsrep/t/foreign_key.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/foreign_key.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/foreign_key.test 2025-05-19 16:14:25.000000000 +0000 @@ -17,4 +17,3 @@ # Cleanup DROP TABLE c; DROP TABLE p; - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/mdev_10186.test mariadb-10.11.13/mysql-test/suite/wsrep/t/mdev_10186.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/mdev_10186.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/mdev_10186.test 2025-05-19 16:14:25.000000000 +0000 @@ -9,4 +9,3 @@ SELECT @@wsrep_on; SET @@GLOBAL.wsrep_cluster_address='gcomm://'; - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/mdev_7798.cnf mariadb-10.11.13/mysql-test/suite/wsrep/t/mdev_7798.cnf --- mariadb-10.11.11/mysql-test/suite/wsrep/t/mdev_7798.cnf 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/mdev_7798.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -4,4 +4,3 @@ wsrep-on=ON wsrep-provider=@ENV.WSREP_PROVIDER wsrep-cluster-address=gcomm:// - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/plugin.test mariadb-10.11.13/mysql-test/suite/wsrep/t/plugin.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/plugin.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/plugin.test 2025-05-19 16:14:25.000000000 +0000 @@ -5,4 +5,4 @@ # MDEV-7604: wsrep plugin lists its status as Unknown # -SELECT plugin_name,plugin_version,plugin_maturity FROM information_schema.plugins where plugin_name like 'wsrep' ORDER BY plugin_maturity,plugin_name; \ No newline at end of file +SELECT plugin_name,plugin_version,plugin_maturity FROM information_schema.plugins WHERE plugin_name like 'wsrep' ORDER BY plugin_maturity,plugin_name; diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/pool_of_threads.test mariadb-10.11.13/mysql-test/suite/wsrep/t/pool_of_threads.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/pool_of_threads.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/pool_of_threads.test 2025-05-19 16:14:25.000000000 +0000 @@ -1,3 +1,4 @@ +--source include/have_innodb.inc --source include/have_wsrep_enabled.inc --source include/have_binlog_format_row.inc diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/variables.test mariadb-10.11.13/mysql-test/suite/wsrep/t/variables.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/variables.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/variables.test 2025-05-19 16:14:25.000000000 +0000 @@ -23,4 +23,3 @@ --echo # variables SELECT VARIABLE_NAME FROM INFORMATION_SCHEMA.SESSION_VARIABLES WHERE VARIABLE_NAME LIKE "wsrep%" ORDER BY VARIABLE_NAME; - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/variables_debug.test mariadb-10.11.13/mysql-test/suite/wsrep/t/variables_debug.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/variables_debug.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/variables_debug.test 2025-05-19 16:14:25.000000000 +0000 @@ -8,7 +8,7 @@ --let $galera_version=26.4.21 source include/check_galera_version.inc; -source include/galera_variables_ok.inc; +source include/galera_variables_ok_debug.inc; --replace_column 2 # SHOW GLOBAL STATUS LIKE 'wsrep%'; @@ -25,4 +25,3 @@ --echo # variables SELECT VARIABLE_NAME FROM INFORMATION_SCHEMA.SESSION_VARIABLES WHERE VARIABLE_NAME LIKE "wsrep%" ORDER BY VARIABLE_NAME; - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.cnf mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.cnf --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,10 @@ +!include ../my.cnf + +[mysqld.1] +wsrep-on=ON +wsrep-provider=@ENV.WSREP_PROVIDER +wsrep-cluster-address=gcomm:// +binlog-format=ROW +wsrep-gtid-domain-id=100 +server-id=10 +innodb-autoinc-lock-mode=2 diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.test mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid-nobinlog.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,28 @@ +# Test wsrep GTID recovery with binlog off. The test restarts the server +# and verifies that the GTID returned by SELECT WSREP_LAST_SEEN_GTID() +# gets initialized properly during server restart. +# +--source include/have_wsrep.inc +--source include/have_wsrep_provider.inc +--source include/have_innodb.inc +--source include/have_debug_sync.inc + +CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB; + +--echo # Case 1: Server goes through graceful shutdown and is restarted +--connection default +INSERT INTO t1 VALUES (1); + +--echo Expect 100-10-2 +SELECT WSREP_LAST_SEEN_GTID(); + +--source include/shutdown_mysqld.inc +--let $galera_wsrep_recover_server_id = 1 +--source suite/galera/include/galera_wsrep_recover.inc +--source suite/galera/include/start_mysqld.inc + +--echo Expect 100-10-2 +SELECT WSREP_LAST_SEEN_GTID(); +SELECT * FROM t1; + +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover-gtid.cnf mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid.cnf --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover-gtid.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,14 @@ +!include ../my.cnf + +[mysqld.1] +wsrep-on=ON +wsrep-provider=@ENV.WSREP_PROVIDER +wsrep-cluster-address=gcomm:// +binlog-format=ROW +log-bin +log-slave-updates +gtid-domain-id=10 +gtid-strict-mode=ON +wsrep-gtid-mode=ON +wsrep-gtid-domain-id=100 +server-id=10 diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover-gtid.test mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover-gtid.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover-gtid.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,73 @@ +# Test wsrep recovery with gtid_mode=ON. The test crashes the server +# in different commit stages and verifies that the GTID returned by +# SELECT WSREP_LAST_SEEN_GTID() and @@GLOBAL.gtid_binlog_pos get +# initialized properly during server restart. +# +--source include/have_wsrep.inc +--source include/have_wsrep_provider.inc +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug_sync.inc + +CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB; + +--echo # Case 1: Server goes through graceful shutdown and is restarted +--connection default +INSERT INTO t1 VALUES (1); +--source include/shutdown_mysqld.inc +--let $galera_wsrep_recover_server_id = 1 +--source suite/galera/include/galera_wsrep_recover.inc +--source suite/galera/include/start_mysqld.inc + +--echo Expect 100-10-2 +SELECT WSREP_LAST_SEEN_GTID(); +SELECT @@GLOBAL.gtid_binlog_pos; +SELECT * FROM t1; + +--echo # Case 2: Server is killed after the transaction gets prepared +--echo # but before it is written into binlog. As there is not GTID assigned, +--echo # the transaction must be rolled back during recovery. +--connect con, localhost, root +SET DEBUG_SYNC = "ha_commit_trans_after_prepare SIGNAL reached WAIT_FOR continue"; +--send INSERT INTO t1 VALUES (2) + +--connection default +SET DEBUG_SYNC = "now WAIT_FOR reached"; +--source include/kill_mysqld.inc +--let $galera_wsrep_recover_server_id = 1 +--source suite/galera/include/galera_wsrep_recover.inc +--source suite/galera/include/start_mysqld.inc +--source include/wait_wsrep_ready.inc + +--echo Expect 100-10-2 +SELECT WSREP_LAST_SEEN_GTID(); +SELECT @@GLOBAL.gtid_binlog_pos; +--echo Expect 1 +SELECT * FROM t1; +--disconnect con + +--echo # Case 3: Server is killed after the transaction gets written into binlog +--echo # but before it is committed in storage engine. In this case the +--echo # transaction must be committed during recovery as it had a valid +--echo # GTID assigned. + +--connect con, localhost, root +SET DEBUG_SYNC = "commit_before_get_LOCK_commit_ordered SIGNAL reached WAIT_FOR continue"; +--send INSERT INTO t1 VALUES (3) + +--connection default +SET DEBUG_SYNC = "now WAIT_FOR reached"; +--source include/kill_mysqld.inc +--let $galera_wsrep_recover_server_id = 1 +--source suite/galera/include/galera_wsrep_recover.inc +--source suite/galera/include/start_mysqld.inc +--source include/wait_wsrep_ready.inc +--echo Expect 100-10-3 +SELECT WSREP_LAST_SEEN_GTID(); +SELECT @@GLOBAL.gtid_binlog_pos; +--echo Expect 1 3 +SELECT * FROM t1; + +--disconnect con + +DROP TABLE t1; diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover.cnf mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover.cnf --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep-recover.cnf 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep-recover.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -6,4 +6,4 @@ innodb-flush-log-at-trx-commit=1 wsrep-cluster-address=gcomm:// wsrep-provider=@ENV.WSREP_PROVIDER -innodb-autoinc-lock-mode=2 \ No newline at end of file +innodb-autoinc-lock-mode=2 diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.cnf mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.cnf --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,7 @@ +!include ../my.cnf + +[mysqld.1] +wsrep-on=ON +wsrep-cluster-address=gcomm:// +wsrep-provider=@ENV.WSREP_PROVIDER +binlog-format=ROW diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.test mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_forced_binlog_format.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,48 @@ +--source include/have_innodb.inc +--source include/have_wsrep_provider.inc +--source include/have_binlog_format_row.inc + +--echo # +--echo # wsrep_forced_binlog_format +--echo # + +--echo # save the initial value +SET @wsrep_forced_binlog_format_global_saved = @@global.wsrep_forced_binlog_format; + +--echo # default +SELECT @@global.wsrep_forced_binlog_format; + +--echo +--echo # scope +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@session.wsrep_forced_binlog_format; +SET @@global.wsrep_forced_binlog_format=STATEMENT; +SELECT @@global.wsrep_forced_binlog_format; + +--echo +--echo # valid values +SET @@global.wsrep_forced_binlog_format=STATEMENT; +SELECT @@global.wsrep_forced_binlog_format; +SET @@global.wsrep_forced_binlog_format=ROW; +SELECT @@global.wsrep_forced_binlog_format; +SET @@global.wsrep_forced_binlog_format=MIXED; +SELECT @@global.wsrep_forced_binlog_format; +SET @@global.wsrep_forced_binlog_format=NONE; +SELECT @@global.wsrep_forced_binlog_format; +SET @@global.wsrep_forced_binlog_format=default; +SELECT @@global.wsrep_forced_binlog_format; + +--echo +--echo # invalid values +--error ER_WRONG_VALUE_FOR_VAR +SET @@global.wsrep_forced_binlog_format=NULL; +--error ER_WRONG_VALUE_FOR_VAR +SET @@global.wsrep_forced_binlog_format='junk'; +--error ER_WRONG_VALUE_FOR_VAR +SET @@global.wsrep_forced_binlog_format=ON; + +--echo +--echo # restore the initial value +SET @@global.wsrep_forced_binlog_format = @wsrep_forced_binlog_format_global_saved; + +--echo # End of test diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.cnf mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.cnf --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.cnf 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,6 @@ +!include ../my.cnf + +[mysqld.1] +wsrep-on=ON +wsrep-provider=@ENV.WSREP_PROVIDER +wsrep-cluster-address=gcomm:// diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.opt mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.opt --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.opt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.opt 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1 @@ +--wsrep-slave-uk-checks=1 diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.test mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.test 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_mixed_case_cmd_arg.test 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,11 @@ +--source include/have_innodb.inc +--source include/have_wsrep_provider.inc +--source include/have_binlog_format_row.inc +--echo # +--echo # MDEV-27126: my_getopt compares option names case sensitively +--echo # + +--echo # Check if the variable is set correctly from options +SELECT @@GLOBAL.wsrep_slave_uk_checks; + +--echo # End of test. diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_rpl.test mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_rpl.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_rpl.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_rpl.test 2025-05-19 16:14:25.000000000 +0000 @@ -41,4 +41,3 @@ --source include/rpl_end.inc --echo # End of test. - diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_variables_sst_method.test mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_variables_sst_method.test --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_variables_sst_method.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_variables_sst_method.test 2025-05-19 16:14:25.000000000 +0000 @@ -44,7 +44,6 @@ SELECT @@global.wsrep_sst_method; SHOW WARNINGS; - --disable_query_log SET @@global.wsrep_sst_method = @wsrep_sst_method_saved; --enable_query_log diff -Nru mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_variables_wsrep_off.cnf mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_variables_wsrep_off.cnf --- mariadb-10.11.11/mysql-test/suite/wsrep/t/wsrep_variables_wsrep_off.cnf 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysql-test/suite/wsrep/t/wsrep_variables_wsrep_off.cnf 2025-05-19 16:14:25.000000000 +0000 @@ -9,4 +9,3 @@ #galera_port=@OPT.port #ist_port=@OPT.port #sst_port=@OPT.port - diff -Nru mariadb-10.11.11/mysys/CMakeLists.txt mariadb-10.11.13/mysys/CMakeLists.txt --- mariadb-10.11.11/mysys/CMakeLists.txt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysys/CMakeLists.txt 2025-05-19 16:14:25.000000000 +0000 @@ -46,7 +46,8 @@ my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c ../sql-common/my_time.c my_rdtsc.c psi_noop.c my_atomic_writes.c my_cpu.c my_likely.c my_largepage.c - file_logger.c my_dlerror.c crc32/crc32c.cc) + file_logger.c my_dlerror.c crc32/crc32c.cc + my_virtual_mem.c) IF (WIN32) SET (MYSYS_SOURCES ${MYSYS_SOURCES} @@ -170,7 +171,7 @@ ENDIF(HAVE_BFD_H) IF (WIN32) - TARGET_LINK_LIBRARIES(mysys iphlpapi dbghelp) + TARGET_LINK_LIBRARIES(mysys iphlpapi dbghelp ws2_32 synchronization) ENDIF(WIN32) # Need explicit pthread for gcc -fsanitize=address diff -Nru mariadb-10.11.11/mysys/mf_keycache.c mariadb-10.11.13/mysys/mf_keycache.c --- mariadb-10.11.11/mysys/mf_keycache.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysys/mf_keycache.c 2025-05-19 16:14:25.000000000 +0000 @@ -3762,10 +3762,11 @@ static int cmp_sec_link(const void *_a, const void *_b) { - BLOCK_LINK *const *a= _a; - BLOCK_LINK *const *b= _b; - return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 : - ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0); + const BLOCK_LINK *a= *(const BLOCK_LINK **)_a; + const BLOCK_LINK *b= *(const BLOCK_LINK **)_b; + + return (a->hash_link->diskpos < b->hash_link->diskpos) ? -1 : + (a->hash_link->diskpos > b->hash_link->diskpos) ? 1 : 0; } diff -Nru mariadb-10.11.11/mysys/my_default.c mariadb-10.11.13/mysys/my_default.c --- mariadb-10.11.11/mysys/my_default.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysys/my_default.c 2025-05-19 16:14:25.000000000 +0000 @@ -318,6 +318,9 @@ } if (! my_defaults_group_suffix) + my_defaults_group_suffix= getenv("MARIADB_GROUP_SUFFIX"); + + if (! my_defaults_group_suffix) my_defaults_group_suffix= getenv("MYSQL_GROUP_SUFFIX"); if (my_defaults_extra_file && my_defaults_extra_file != extra_file_buffer) diff -Nru mariadb-10.11.11/mysys/my_getopt.c mariadb-10.11.13/mysys/my_getopt.c --- mariadb-10.11.11/mysys/my_getopt.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysys/my_getopt.c 2025-05-19 16:14:25.000000000 +0000 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1002,7 +1003,7 @@ for (;s != end ; s++, t++) { - if ((*s != '-' ? *s : '_') != (*t != '-' ? *t : '_')) + if ((*s != '-' ? tolower(*s) : '_') != (*t != '-' ? tolower(*t) : '_')) DBUG_RETURN(1); } DBUG_RETURN(0); diff -Nru mariadb-10.11.11/mysys/my_largepage.c mariadb-10.11.13/mysys/my_largepage.c --- mariadb-10.11.11/mysys/my_largepage.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysys/my_largepage.c 2025-05-19 16:14:25.000000000 +0000 @@ -35,17 +35,11 @@ #endif /* __sun__ ... */ #endif /* HAVE_SOLARIS_LARGE_PAGES */ -#if defined(_WIN32) -static size_t my_large_page_size; -#define HAVE_LARGE_PAGES -#elif defined(HAVE_MMAP) -#define HAVE_LARGE_PAGES -#endif -#ifdef HAVE_LARGE_PAGES -static my_bool my_use_large_pages= 0; -#else -#define my_use_large_pages 0 +my_bool my_use_large_pages; + +#ifdef _WIN32 +static size_t my_large_page_size; #endif #if defined(HAVE_GETPAGESIZES) || defined(__linux__) @@ -172,7 +166,7 @@ @retval a large page size that is valid on this system or 0 if no large page size possible. */ -#if defined(HAVE_MMAP) && !defined(_WIN32) +#ifndef _WIN32 static size_t my_next_large_page_size(size_t sz, int *start) { DBUG_ENTER("my_next_large_page_size"); @@ -188,11 +182,12 @@ } DBUG_RETURN(0); } -#endif /* defined(MMAP) || !defined(_WIN32) */ +#endif -int my_init_large_pages(my_bool super_large_pages) +int my_init_large_pages(void) { + my_use_large_pages= 1; #ifdef _WIN32 if (!my_obtain_privilege(SE_LOCK_MEMORY_NAME)) { @@ -200,19 +195,15 @@ "Lock Pages in memory access rights required for use with" " large-pages, see https://mariadb.com/kb/en/library/" "mariadb-memory-allocation/#huge-pages", MYF(MY_WME)); + my_use_large_pages= 0; } my_large_page_size= GetLargePageMinimum(); #endif - my_use_large_pages= 1; my_get_large_page_sizes(my_large_page_sizes); -#ifndef HAVE_LARGE_PAGES - my_printf_error(EE_OUTOFMEMORY, "No large page support on this platform", - MYF(MY_WME)); -#endif - #ifdef HAVE_SOLARIS_LARGE_PAGES + extern my_bool opt_super_large_pages; /* tell the kernel that we want to use 4/256MB page for heap storage and also for the stack. We use 4 MByte as default and if the @@ -222,9 +213,15 @@ measured in a number of GBytes. We use as big pages as possible which isn't bigger than the above desired page sizes. + + Note: This refers to some implementations of the SPARC ISA, + where the supported page sizes are + 8KiB, 64KiB, 512KiB, 4MiB, 32MiB, 256MiB, 2GiB, and 16GiB. + On implementations of the AMD64 ISA, the available page sizes + should be 4KiB, 2MiB, and 1GiB. */ int nelem= 0; - size_t max_desired_page_size= (super_large_pages ? 256 : 4) * 1024 * 1024; + size_t max_desired_page_size= opt_super_large_pages ? 256 << 20 : 4 << 20; size_t max_page_size= my_next_large_page_size(max_desired_page_size, &nelem); if (max_page_size > 0) @@ -426,6 +423,78 @@ DBUG_RETURN(ptr); } +#ifndef _WIN32 +/** + Special large pages allocator, with possibility to commit to allocating + more memory later. + Every implementation returns a zero filled buffer here. +*/ +char *my_large_virtual_alloc(size_t *size) +{ + char *ptr; + DBUG_ENTER("my_large_virtual_alloc"); + + if (my_use_large_pages) + { + size_t large_page_size; + int page_i= 0; + + while ((large_page_size= my_next_large_page_size(*size, &page_i)) != 0) + { + int mapflag= MAP_PRIVATE | +# ifdef MAP_POPULATE + MAP_POPULATE | +# endif +# if defined MAP_HUGETLB /* linux 2.6.32 */ + MAP_HUGETLB | +# if defined MAP_HUGE_SHIFT /* Linux-3.8+ */ + my_bit_log2_size_t(large_page_size) << MAP_HUGE_SHIFT | +# else +# warning "No explicit large page (HUGETLB pages) support in Linux < 3.8" +# endif +# elif defined MAP_ALIGNED + MAP_ALIGNED(my_bit_log2_size_t(large_page_size)) | +# if defined MAP_ALIGNED_SUPER + MAP_ALIGNED_SUPER | +# endif +# endif + OS_MAP_ANON; + + size_t aligned_size= MY_ALIGN(*size, (size_t) large_page_size); + ptr= mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, mapflag, -1, 0); + if (ptr == (void*) -1) + { + ptr= NULL; + /* try next smaller memory size */ + if (errno == ENOMEM) + continue; + + /* other errors are more serious */ + break; + } + else /* success */ + { + /* + we do need to record the adjustment so that munmap gets called with + the right size. This is only the case for HUGETLB pages. + */ + *size= aligned_size; + DBUG_RETURN(ptr); + } + } + } + + ptr= mmap(NULL, *size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | OS_MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + ptr= NULL; + } + + DBUG_RETURN(ptr); +} +#endif /** General large pages deallocator. @@ -482,7 +551,7 @@ #endif /* memory_sanitizer */ #else my_free_lock(ptr); -#endif /* HAVE_MMMAP */ +#endif /* HAVE_MMAP */ DBUG_VOID_RETURN; } diff -Nru mariadb-10.11.11/mysys/my_pread.c mariadb-10.11.13/mysys/my_pread.c --- mariadb-10.11.11/mysys/my_pread.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/mysys/my_pread.c 2025-05-19 16:14:25.000000000 +0000 @@ -158,6 +158,15 @@ #else writtenbytes= pwrite(Filedes, Buffer, Count, offset); #endif + + DBUG_EXECUTE_IF ("simulate_file_pwrite_error", + if (writtenbytes == Count && + my_seek(Filedes, 0, SEEK_END, MYF(0)) > 1024*1024L) + { + errno= ENOSPC; + writtenbytes= (size_t) -1; + }); + if (writtenbytes == Count) break; my_errno= errno; diff -Nru mariadb-10.11.11/mysys/my_virtual_mem.c mariadb-10.11.13/mysys/my_virtual_mem.c --- mariadb-10.11.11/mysys/my_virtual_mem.c 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/mysys/my_virtual_mem.c 2025-05-19 16:14:25.000000000 +0000 @@ -0,0 +1,201 @@ +/* Copyright (c) 2025, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include +#ifdef _AIX +# include +#endif + +/* + Functionality for handling virtual memory + + - reserve range, + - commit memory (within reserved range) + - decommit previously commited memory + - release range + + Not every OS has a "reserve" functionality, i.e it is not always + possible to reserve memory larger than swap or RAM for example. + + We try to respect use_large_pages setting, on Windows and Linux +*/ +#ifdef _WIN32 +char *my_virtual_mem_reserve(size_t *size) +{ + DWORD flags= my_use_large_pages + ? MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT + : MEM_RESERVE; + char *ptr= VirtualAlloc(NULL, *size, flags, PAGE_READWRITE); + if (!ptr && (flags & MEM_LARGE_PAGES)) + { + /* Try without large pages */ + ptr= VirtualAlloc(NULL, *size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (!ptr) + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), *size); + } + return ptr; +} +#endif + +#if defined _WIN32 && !defined DBUG_OFF +static my_bool is_memory_committed(char *ptr, size_t size) +{ + MEMORY_BASIC_INFORMATION mbi; + if (VirtualQuery(ptr, &mbi, sizeof mbi) == 0) + DBUG_ASSERT(0); + return !!(mbi.State & MEM_COMMIT); +} +#endif + +char *my_virtual_mem_commit(char *ptr, size_t size) +{ + DBUG_ASSERT(ptr); +#ifdef _WIN32 + if (my_use_large_pages) + { + DBUG_ASSERT(is_memory_committed(ptr, size)); + } + else + { + void *p= VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE); + DBUG_ASSERT(p == ptr); + if (!p) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } + } +#else + if (my_use_large_pages) + /* my_large_virtual_alloc() already created a read/write mapping. */; + else + { +# ifdef _AIX + /* + MAP_FIXED does not not work on IBM AIX in the way does works elsewhere. + Apparently, it is not possible to mmap(2) a range that is already in use, + at least not by default. + + mprotect(2) is the fallback, it can't communicate out-of-memory + conditions, but it looks like overcommitting is not possible on + AIX anyway. + */ + if (mprotect(ptr, size, PROT_READ | PROT_WRITE)) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } +# else + void *p= 0; + const int flags= +# ifdef MAP_POPULATE + MAP_POPULATE | +# endif + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; + p= mmap(ptr, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (p == MAP_FAILED) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } + DBUG_ASSERT(p == ptr); +# if defined MADV_FREE_REUSABLE && defined MADV_FREE_REUSE /* Apple macOS */ + madvise(ptr, size, MADV_FREE_REUSE); /* cancel MADV_FREE_REUSABLE */ +# endif +# endif + } +#endif + update_malloc_size(size, 0); + return ptr; +} + +void my_virtual_mem_decommit(char *ptr, size_t size) +{ +#ifdef _WIN32 + DBUG_ASSERT(is_memory_committed(ptr, size)); +# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT +# error "VirtualFree(MEM_DECOMMIT) will not allow subsequent reads!" +# endif + if (!my_use_large_pages) + { + if (!VirtualFree(ptr, size, MEM_DECOMMIT)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, + GetLastError()); + DBUG_ASSERT(0); + } + } +#else + const int prot= +# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* + In InnoDB, buf_pool_t::page_guess() may deference pointers to + this, assuming that either the original contents or zeroed + contents is available. + */ + PROT_READ +# else + /* We will explicitly mark the memory unaccessible. */ + PROT_NONE +# endif + ; +# ifdef _AIX + disclaim(ptr, size, DISCLAIM_ZEROMEM); +# elif defined __linux__ || defined __osf__ + madvise(ptr, size, MADV_DONTNEED); /* OSF/1, Linux mimicing AIX disclaim() */ +# elif defined MADV_FREE_REUSABLE && defined MADV_FREE_REUSE + /* Mac OS X 10.9; undocumented in Apple macOS */ + madvise(ptr, size, MADV_FREE_REUSABLE); /* macOS mimicing AIX disclaim() */ +# elif defined MADV_PURGE /* Illumos */ + madvise(ptr, size, MADV_PURGE); /* Illumos mimicing AIX disclaim() */ +# elif defined MADV_FREE + /* FreeBSD, NetBSD, OpenBSD, Dragonfly BSD, OpenSolaris, Apple macOS */ + madvise(ptr, size, MADV_FREE); /* allow lazy zeroing out */ +# elif defined MADV_DONTNEED +# warning "It is unclear if madvise(MADV_DONTNEED) works as intended" + madvise(ptr, size, MADV_DONTNEED); +# else +# warning "Do not know how to decommit memory" +# endif + if (mprotect(ptr, size, prot)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, errno); + DBUG_ASSERT(0); + } +#endif + update_malloc_size(-(longlong) size, 0); +} + +void my_virtual_mem_release(char *ptr, size_t size) +{ +#ifdef _WIN32 + DBUG_ASSERT(my_use_large_pages || !is_memory_committed(ptr, size)); + if (!VirtualFree(ptr, 0, MEM_RELEASE)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, + GetLastError()); + DBUG_ASSERT(0); + } +#else + if (munmap(ptr, size)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, errno); + DBUG_ASSERT(0); + } +#endif +} diff -Nru mariadb-10.11.11/plugin/auth_examples/auth_0x0100.c mariadb-10.11.13/plugin/auth_examples/auth_0x0100.c --- mariadb-10.11.11/plugin/auth_examples/auth_0x0100.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/auth_examples/auth_0x0100.c 2025-05-19 16:14:25.000000000 +0000 @@ -56,6 +56,10 @@ }; #endif +/* function-type-mismatch ignore */ +#if defined(__clang__) +__attribute__((no_sanitize("undefined"))) +#endif static int do_auth_0x0100(MYSQL_PLUGIN_VIO *vio, MYSQL_SERVER_AUTH_INFO *info) { info->password_used= 1; diff -Nru mariadb-10.11.11/plugin/server_audit/server_audit.c mariadb-10.11.13/plugin/server_audit/server_audit.c --- mariadb-10.11.11/plugin/server_audit/server_audit.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/server_audit/server_audit.c 2025-05-19 16:14:25.000000000 +0000 @@ -2855,6 +2855,18 @@ { char *new_name= (*(char **) save) ? *(char **) save : empty_str; + if (strlen(new_name) + 4 > FN_REFLEN) + { + error_header(); + fprintf(stderr, + "server_audit_file_path can't exceed %d characters.\n", + FN_REFLEN - 4); + fprintf(stderr, "Log filename remains unchanged '%s'.\n", file_path); + CLIENT_ERROR(1, "server_audit_file_path can't exceed %d characters.", + MYF(ME_WARNING), FN_REFLEN - 4); + return; + } + ADD_ATOMIC(internal_stop_logging, 1); error_header(); fprintf(stderr, "Log file name was changed to '%s'.\n", new_name); diff -Nru mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6.result mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6.result --- mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6.result 2025-05-19 16:14:25.000000000 +0000 @@ -2407,3 +2407,26 @@ DROP TABLE t1; SET max_sort_length=DEFAULT; # End of 10.8 tests +# +# MDEV-36235 Incorrect result for BETWEEN over unique blob prefix +# +CREATE OR REPLACE TABLE t1 (c1 BINARY(16), UNIQUE (c1)); +INSERT INTO t1 (c1) VALUES (0x00000000000000000000000000000001); +INSERT INTO t1 (c1) VALUES (0x00000000000000000000000000000002); +SELECT CAST(c1 AS INET6) FROM t1 WHERE '::1' BETWEEN CAST('::1' AS INET6) AND c1; +CAST(c1 AS INET6) +::1 +::2 +SELECT CAST(c1 AS INET6) FROM t1 IGNORE KEY(c1) WHERE '::1' BETWEEN CAST('::1' AS INET6) AND c1; +CAST(c1 AS INET6) +::1 +::2 +SELECT CAST(c1 AS INET6) FROM t1 WHERE '::2' BETWEEN c1 AND CAST('::2' AS INET6); +CAST(c1 AS INET6) +::1 +::2 +SELECT CAST(c1 AS INET6) FROM t1 IGNORE KEY(c1) WHERE '::2' BETWEEN c1 AND CAST('::2' AS INET6); +CAST(c1 AS INET6) +::1 +::2 +DROP TABLE t1; diff -Nru mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6.test mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6.test --- mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6.test 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6.test 2025-05-19 16:14:25.000000000 +0000 @@ -1741,3 +1741,15 @@ SET max_sort_length=DEFAULT; --echo # End of 10.8 tests + +--echo # +--echo # MDEV-36235 Incorrect result for BETWEEN over unique blob prefix +--echo # +CREATE OR REPLACE TABLE t1 (c1 BINARY(16), UNIQUE (c1)); +INSERT INTO t1 (c1) VALUES (0x00000000000000000000000000000001); +INSERT INTO t1 (c1) VALUES (0x00000000000000000000000000000002); +SELECT CAST(c1 AS INET6) FROM t1 WHERE '::1' BETWEEN CAST('::1' AS INET6) AND c1; +SELECT CAST(c1 AS INET6) FROM t1 IGNORE KEY(c1) WHERE '::1' BETWEEN CAST('::1' AS INET6) AND c1; +SELECT CAST(c1 AS INET6) FROM t1 WHERE '::2' BETWEEN c1 AND CAST('::2' AS INET6); +SELECT CAST(c1 AS INET6) FROM t1 IGNORE KEY(c1) WHERE '::2' BETWEEN c1 AND CAST('::2' AS INET6); +DROP TABLE t1; diff -Nru mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6_engines.inc mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_engines.inc --- mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6_engines.inc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_engines.inc 2025-05-19 16:14:25.000000000 +0000 @@ -36,3 +36,16 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=CAST('::ff' AS INET6); DROP TABLE t1; + +--echo # +--echo # MDEV-34922: Assertion `value.length() == FbtImpl::binary_length()' failed in +--echo # Type_handler_fbt::Field_fbt::store_native, +--echo # Assertion `item->null_value' failed in Type_handler::Item_send_str +--echo # + +CREATE TABLE t1 (a datetime); +INSERT INTO t1 VALUES (NULL); +SELECT * FROM (SELECT cast('::' AS INET6),min(1) FROM t1 WHERE if(uuid_short(), a,1)) dt; +DROP TABLE t1; + +--echo # End of 10.5 tests diff -Nru mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result --- mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_innodb.result 2025-05-19 16:14:25.000000000 +0000 @@ -88,6 +88,18 @@ Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = INET6'::ff' DROP TABLE t1; # +# MDEV-34922: Assertion `value.length() == FbtImpl::binary_length()' failed in +# Type_handler_fbt::Field_fbt::store_native, +# Assertion `item->null_value' failed in Type_handler::Item_send_str +# +CREATE TABLE t1 (a datetime); +INSERT INTO t1 VALUES (NULL); +SELECT * FROM (SELECT cast('::' AS INET6),min(1) FROM t1 WHERE if(uuid_short(), a,1)) dt; +cast('::' AS INET6) min(1) +:: NULL +DROP TABLE t1; +# End of 10.5 tests +# # MDEV-26742 Assertion `field->type_handler() == this' failed in FixedBinTypeBundle::Type_handler_fbt::stored_field_cmp_to_item # CREATE TABLE t1 (pk inet6, c text) engine=myisam; diff -Nru mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6_memory.result mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_memory.result --- mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6_memory.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_memory.result 2025-05-19 16:14:25.000000000 +0000 @@ -155,5 +155,17 @@ Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = INET6'::ff' DROP TABLE t1; # +# MDEV-34922: Assertion `value.length() == FbtImpl::binary_length()' failed in +# Type_handler_fbt::Field_fbt::store_native, +# Assertion `item->null_value' failed in Type_handler::Item_send_str +# +CREATE TABLE t1 (a datetime); +INSERT INTO t1 VALUES (NULL); +SELECT * FROM (SELECT cast('::' AS INET6),min(1) FROM t1 WHERE if(uuid_short(), a,1)) dt; +cast('::' AS INET6) min(1) +:: NULL +DROP TABLE t1; +# End of 10.5 tests +# # End of 10.5 tests # diff -Nru mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result --- mariadb-10.11.11/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/type_inet/mysql-test/type_inet/type_inet6_myisam.result 2025-05-19 16:14:25.000000000 +0000 @@ -88,6 +88,18 @@ Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = INET6'::ff' DROP TABLE t1; # +# MDEV-34922: Assertion `value.length() == FbtImpl::binary_length()' failed in +# Type_handler_fbt::Field_fbt::store_native, +# Assertion `item->null_value' failed in Type_handler::Item_send_str +# +CREATE TABLE t1 (a datetime); +INSERT INTO t1 VALUES (NULL); +SELECT * FROM (SELECT cast('::' AS INET6),min(1) FROM t1 WHERE if(uuid_short(), a,1)) dt; +cast('::' AS INET6) min(1) +:: NULL +DROP TABLE t1; +# End of 10.5 tests +# # MDEV-26742 Assertion `field->type_handler() == this' failed in FixedBinTypeBundle::Type_handler_fbt::stored_field_cmp_to_item # CREATE TABLE t1 (c varchar(64), key(c)) engine=myisam; diff -Nru mariadb-10.11.11/plugin/userstat/client_stats.cc mariadb-10.11.13/plugin/userstat/client_stats.cc --- mariadb-10.11.11/plugin/userstat/client_stats.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/userstat/client_stats.cc 2025-05-19 16:14:25.000000000 +0000 @@ -45,8 +45,8 @@ table->field[j++]->store((longlong)user_stats->total_connections,TRUE); table->field[j++]->store((longlong)user_stats->concurrent_connections, TRUE); table->field[j++]->store((longlong)user_stats->connected_time, TRUE); - table->field[j++]->store((double)user_stats->busy_time); - table->field[j++]->store((double)user_stats->cpu_time); + table->field[j++]->store((double)user_stats->busy_time/1e6); + table->field[j++]->store((double)user_stats->cpu_time/1e6); table->field[j++]->store((longlong)user_stats->bytes_received, TRUE); table->field[j++]->store((longlong)user_stats->bytes_sent, TRUE); table->field[j++]->store((longlong)user_stats->binlog_bytes_written, TRUE); diff -Nru mariadb-10.11.11/plugin/versioning/versioning.cc mariadb-10.11.13/plugin/versioning/versioning.cc --- mariadb-10.11.11/plugin/versioning/versioning.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/plugin/versioning/versioning.cc 2025-05-19 16:14:25.000000000 +0000 @@ -150,7 +150,6 @@ { { C_STRING_WITH_LEN("TRT_TRX_ID") }, BUILDER(Create_func_trt)}, { { C_STRING_WITH_LEN("TRT_TRX_SEES") }, BUILDER(Create_func_trt_trx_sees)}, { { C_STRING_WITH_LEN("TRT_TRX_SEES_EQ") }, BUILDER(Create_func_trt_trx_sees)}, - { {0, 0}, NULL} }; diff -Nru mariadb-10.11.11/scripts/mysqlhotcopy.sh mariadb-10.11.13/scripts/mysqlhotcopy.sh --- mariadb-10.11.11/scripts/mysqlhotcopy.sh 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/scripts/mysqlhotcopy.sh 2025-05-19 16:14:25.000000000 +0000 @@ -208,7 +208,7 @@ else { $dsn .= "host=" . $opt{host}; - if ($opt{host} ne "localhost") + if ($opt{host} ne "localhost" and $opt{port}) { $dsn .= ";port=". $opt{port}; } diff -Nru mariadb-10.11.11/scripts/wsrep_sst_common.sh mariadb-10.11.13/scripts/wsrep_sst_common.sh --- mariadb-10.11.11/scripts/wsrep_sst_common.sh 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/scripts/wsrep_sst_common.sh 2025-05-19 16:14:25.000000000 +0000 @@ -1910,4 +1910,17 @@ SST_PID="$DATA/wsrep_sst.pid" +if [ -n "${MTR_SST_JOINER_DELAY:-}" ]; then + MTR_SST_JOINER_DELAY=$(trim_string "$MTR_SST_JOINER_DELAY") +fi + +simulate_long_sst() +{ + # Delay for MTR tests if needed to simulate long SST/IST: + if [ ${MTR_SST_JOINER_DELAY:-0} -gt 0 ]; then + wsrep_log_info "Sleeping $MTR_SST_JOINER_DELAY seconds for MTR test" + sleep $MTR_SST_JOINER_DELAY + fi +} + wsrep_log_info "$WSREP_METHOD $WSREP_TRANSFER_TYPE started on $WSREP_SST_OPT_ROLE" diff -Nru mariadb-10.11.11/scripts/wsrep_sst_mariabackup.sh mariadb-10.11.13/scripts/wsrep_sst_mariabackup.sh --- mariadb-10.11.11/scripts/wsrep_sst_mariabackup.sh 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/scripts/wsrep_sst_mariabackup.sh 2025-05-19 16:14:25.000000000 +0000 @@ -1513,6 +1513,8 @@ exit 2 fi + simulate_long_sst + # use donor magic file, if present # if IST was used, donor magic file was not created # Remove special tags from the magic file, and from the output: diff -Nru mariadb-10.11.11/scripts/wsrep_sst_mysqldump.sh mariadb-10.11.13/scripts/wsrep_sst_mysqldump.sh --- mariadb-10.11.11/scripts/wsrep_sst_mysqldump.sh 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/scripts/wsrep_sst_mysqldump.sh 2025-05-19 16:14:25.000000000 +0000 @@ -184,5 +184,9 @@ echo "$SET_START_POSITION" | $MYSQL || exit $? fi +if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then + simulate_long_sst +fi + wsrep_log_info "$WSREP_METHOD $WSREP_TRANSFER_TYPE completed on $WSREP_SST_OPT_ROLE" exit 0 diff -Nru mariadb-10.11.11/scripts/wsrep_sst_rsync.sh mariadb-10.11.13/scripts/wsrep_sst_rsync.sh --- mariadb-10.11.11/scripts/wsrep_sst_rsync.sh 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/scripts/wsrep_sst_rsync.sh 2025-05-19 16:14:25.000000000 +0000 @@ -915,6 +915,8 @@ fi fi + simulate_long_sst + # Remove special tags from the magic file, and from the output: coords=$(head -n1 "$MAGIC_FILE") wsrep_log_info "Galera co-ords from recovery: $coords" diff -Nru mariadb-10.11.11/sql/filesort.cc mariadb-10.11.13/sql/filesort.cc --- mariadb-10.11.11/sql/filesort.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/filesort.cc 2025-05-19 16:14:25.000000000 +0000 @@ -640,26 +640,16 @@ } #ifndef DBUG_OFF -/* - Print table's current row into a buffer and return a pointer to it. - This is intended to be used from gdb: - - (gdb) p dbug_print_table_row(table) - $33 = "SUBQUERY2_t1(col_int_key,col_varchar_nokey)=(7,c)" - (gdb) +static char dbug_row_print_buf[4096]; - Only columns in table->read_set are printed -*/ -const char* dbug_print_row(TABLE *table, const uchar *rec, bool print_names) +String dbug_format_row(TABLE *table, const uchar *rec, bool print_names) { Field **pfield; - const size_t alloc_size= 512; - char *row_buff= (char *) alloc_root(&table->mem_root, alloc_size); - char *row_buff_tmp= (char *) alloc_root(&table->mem_root, alloc_size); - String tmp(row_buff_tmp, alloc_size, &my_charset_bin); - String output(row_buff, alloc_size, &my_charset_bin); + char row_buff_tmp[512]; + String tmp(row_buff_tmp, sizeof(row_buff_tmp), &my_charset_bin); + String output(dbug_row_print_buf, sizeof(dbug_row_print_buf), &my_charset_bin); auto move_back_lambda= [table, rec]() mutable { table->move_fields(table->field, table->record[0], rec); @@ -672,7 +662,7 @@ move_back_guard.engage(); } - SCOPE_VALUE(table->read_set, (table->read_set && table->write_set) ? + SCOPE_VALUE(table->read_set, (table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE) ? table->write_set : table->read_set); output.length(0); @@ -724,10 +714,35 @@ } output.append(')'); - return output.c_ptr_safe(); + return output; } +/** + A function to display a row in debugger. + + Example usage: + (gdb) p dbug_print_row(table, table->record[1]) +*/ +const char *dbug_print_row(TABLE *table, const uchar *rec) +{ + String row= dbug_format_row(table, table->record[0]); + if (row.length() > sizeof dbug_row_print_buf - 1) + return "Couldn't fit into buffer"; + memcpy(dbug_row_print_buf, row.c_ptr(), row.length()); + return dbug_row_print_buf; +} +/** + Print table's current row into a buffer and return a pointer to it. + + This is intended to be used from gdb: + + (gdb) p dbug_print_table_row(table) + $33 = "SUBQUERY2_t1(col_int_key,col_varchar_nokey)=(7,c)" + (gdb) + + Only columns in table->read_set are printed +*/ const char* dbug_print_table_row(TABLE *table) { return dbug_print_row(table, table->record[0]); diff -Nru mariadb-10.11.11/sql/ha_partition.cc mariadb-10.11.13/sql/ha_partition.cc --- mariadb-10.11.11/sql/ha_partition.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/ha_partition.cc 2025-05-19 16:14:25.000000000 +0000 @@ -2141,7 +2141,9 @@ m_added_file[i]->extra(HA_EXTRA_BEGIN_ALTER_COPY); error= copy_partitions(copied, deleted); for (i= 0; i < part_count; i++) - m_added_file[i]->extra(HA_EXTRA_END_ALTER_COPY); + m_added_file[i]->extra(error + ? HA_EXTRA_ABORT_ALTER_COPY + : HA_EXTRA_END_ALTER_COPY); if (unlikely(error)) { /* @@ -4404,31 +4406,19 @@ DBUG_ENTER("ha_partition::store_lock"); DBUG_ASSERT(thd == current_thd); - /* - This can be called from get_lock_data() in mysql_lock_abort_for_thread(), - even when thd != table->in_use. In that case don't use partition pruning, - but use all partitions instead to avoid using another threads structures. - */ - if (thd != table->in_use) + MY_BITMAP *used_partitions= lock_type == TL_UNLOCK || + lock_type == TL_IGNORE ? + &m_locked_partitions : + &m_part_info->lock_partitions; + + for (i= bitmap_get_first_set(used_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(used_partitions, i)) { - for (i= 0; i < m_tot_parts; i++) - to= m_file[i]->store_lock(thd, to, lock_type); + DBUG_PRINT("info", ("store lock %u iteration", i)); + to= m_file[i]->store_lock(thd, to, lock_type); } - else - { - MY_BITMAP *used_partitions= lock_type == TL_UNLOCK || - lock_type == TL_IGNORE ? - &m_locked_partitions : - &m_part_info->lock_partitions; - for (i= bitmap_get_first_set(used_partitions); - i < m_tot_parts; - i= bitmap_get_next_set(used_partitions, i)) - { - DBUG_PRINT("info", ("store lock %u iteration", i)); - to= m_file[i]->store_lock(thd, to, lock_type); - } - } DBUG_RETURN(to); } @@ -4755,7 +4745,6 @@ } - m_last_part= new_part_id; start_part_bulk_insert(thd, new_part_id); DBUG_ASSERT(!m_file[new_part_id]->row_logging); if (new_part_id == old_part_id) @@ -4790,6 +4779,8 @@ goto exit; } + m_last_part= new_part_id; + exit: /* if updating an auto_increment column, update @@ -9478,6 +9469,7 @@ case HA_EXTRA_STARTING_ORDERED_INDEX_SCAN: case HA_EXTRA_BEGIN_ALTER_COPY: case HA_EXTRA_END_ALTER_COPY: + case HA_EXTRA_ABORT_ALTER_COPY: DBUG_RETURN(loop_partitions(extra_cb, &operation)); default: { diff -Nru mariadb-10.11.11/sql/ha_sequence.cc mariadb-10.11.13/sql/ha_sequence.cc --- mariadb-10.11.11/sql/ha_sequence.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/ha_sequence.cc 2025-05-19 16:14:25.000000000 +0000 @@ -353,6 +353,12 @@ return(COMPATIBLE_DATA_YES); } +enum_alter_inplace_result +ha_sequence::check_if_supported_inplace_alter(TABLE *altered_table, + Alter_inplace_info *ai) +{ + return file->check_if_supported_inplace_alter(altered_table, ai); +} int ha_sequence::external_lock(THD *thd, int lock_type) { diff -Nru mariadb-10.11.11/sql/ha_sequence.h mariadb-10.11.13/sql/ha_sequence.h --- mariadb-10.11.11/sql/ha_sequence.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/ha_sequence.h 2025-05-19 16:14:25.000000000 +0000 @@ -94,6 +94,9 @@ /* For ALTER ONLINE TABLE */ bool check_if_incompatible_data(HA_CREATE_INFO *create_info, uint table_changes) override; + enum_alter_inplace_result + check_if_supported_inplace_alter(TABLE *altered_table, + Alter_inplace_info *ai) override; void write_lock() { write_locked= 1;} void unlock() { write_locked= 0; } bool is_locked() { return write_locked; } diff -Nru mariadb-10.11.11/sql/handle_connections_win.cc mariadb-10.11.13/sql/handle_connections_win.cc --- mariadb-10.11.11/sql/handle_connections_win.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/handle_connections_win.cc 2025-05-19 16:14:25.000000000 +0000 @@ -595,11 +595,8 @@ void handle_connections_win() { - int n_waits; - create_shutdown_event(); wait_events.push_back(hEventShutdown); - n_waits= 1; for (size_t i= 0; i < all_listeners.size(); i++) { diff -Nru mariadb-10.11.11/sql/handler.cc mariadb-10.11.13/sql/handler.cc --- mariadb-10.11.11/sql/handler.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/handler.cc 2025-05-19 16:14:25.000000000 +0000 @@ -499,7 +499,7 @@ SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG)); SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT)); SETMSG(HA_FTS_INVALID_DOCID, "Invalid InnoDB FTS Doc ID"); - SETMSG(HA_ERR_DISK_FULL, ER_DEFAULT(ER_DISK_FULL)); + SETMSG(HA_ERR_DISK_FULL, "Disk got full writing '%s'"); SETMSG(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE, "Too many words in a FTS phrase or proximity search"); SETMSG(HA_ERR_FK_DEPTH_EXCEEDED, "Foreign key cascade delete/update exceeds"); SETMSG(HA_ERR_TABLESPACE_MISSING, ER_DEFAULT(ER_TABLESPACE_MISSING)); @@ -672,6 +672,8 @@ DBUG_EXECUTE_IF("unstable_db_type", { static int i= (int) DB_TYPE_FIRST_DYNAMIC; + while (installed_htons[i]) + i++; hton->db_type= (enum legacy_db_type)++i; }); @@ -1899,6 +1901,8 @@ } #endif /* WITH_WSREP */ error= ha_commit_one_phase(thd, all); + if (error) + goto err; #ifdef WITH_WSREP // Here in case of error we must return 2 for inconsistency if (run_wsrep_hooks && !error) @@ -2139,7 +2143,7 @@ if (ha_info) { - int err; + int err= 0; if (has_binlog_hton(ha_info) && (err= binlog_commit(thd, all, @@ -2147,6 +2151,8 @@ { my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); error= 1; + + goto err; } for (; ha_info; ha_info= ha_info_next) { @@ -2182,7 +2188,7 @@ if (count >= 2) statistic_increment(transactions_multi_engine, LOCK_status); } - + err: DBUG_RETURN(error); } @@ -2291,7 +2297,7 @@ "conf %d wsrep_err %s SQL %s", thd->thread_id, thd->query_id, thd->wsrep_trx().state(), wsrep::to_c_string(thd->wsrep_cs().current_error()), - thd->query()); + wsrep_thd_query(thd)); } #endif /* WITH_WSREP */ } @@ -2307,7 +2313,7 @@ if (WSREP(thd) && thd->is_error()) { WSREP_DEBUG("ha_rollback_trans(%lld, %s) rolled back: msg %s is_real %d wsrep_err %s", - thd->thread_id, all? "TRUE" : "FALSE", + thd->thread_id, all ? "TRUE" : "FALSE", thd->get_stmt_da()->message(), is_real_trans, wsrep::to_c_string(thd->wsrep_cs().current_error())); } @@ -2800,6 +2806,7 @@ } if (IF_WSREP((wsrep_emulate_bin_log && wsrep_is_wsrep_xid(info->list + i) && + !wsrep_is_xid_gtid_undefined(info->list + i) && x <= wsrep_limit), false) || tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT) { @@ -4455,8 +4462,12 @@ break; case ENOSPC: case HA_ERR_DISK_FULL: - textno= ER_DISK_FULL; SET_FATAL_ERROR; // Ensure error is logged + my_printf_error(ER_DISK_FULL, "Disk got full writing '%s.%s' (Errcode: %M)", + MYF(errflag | ME_ERROR_LOG), + table_share->db.str, table_share->table_name.str, + error); + DBUG_VOID_RETURN; break; case HA_ERR_KEY_NOT_FOUND: case HA_ERR_NO_ACTIVE_RECORD: @@ -7718,7 +7729,10 @@ }); #endif /* WITH_WSREP */ if ((error= ha_check_overlaps(NULL, buf))) + { + DEBUG_SYNC_C("ha_write_row_end"); DBUG_RETURN(error); + } /* NOTE: this != table->file is true in 3 cases: @@ -7739,6 +7753,7 @@ if (table->next_number_field && buf == table->record[0]) if (int err= update_auto_increment()) error= err; + DEBUG_SYNC_C("ha_write_row_end"); DBUG_RETURN(error); } } @@ -7749,7 +7764,8 @@ TABLE_IO_WAIT(tracker, PSI_TABLE_WRITE_ROW, MAX_KEY, error, { error= write_row(buf); }) - DBUG_PRINT("dml", ("INSERT: %s = %d", dbug_print_row(table, buf, false), error)); + DBUG_PRINT("dml", ("INSERT: %s = %d", + dbug_format_row(table, buf, false).c_ptr_safe(), error)); MYSQL_INSERT_ROW_DONE(error); if (likely(!error)) @@ -7760,14 +7776,12 @@ Log_func *log_func= Write_rows_log_event::binlog_row_logging_function; error= binlog_log_row(table, 0, buf, log_func); } + #ifdef WITH_WSREP - if (WSREP_NNULL(ha_thd()) && table_share->tmp_table == NO_TMP_TABLE && - ht->flags & HTON_WSREP_REPLICATION && - !error && (error= wsrep_after_row(ha_thd()))) - { - DEBUG_SYNC_C("ha_write_row_end"); - DBUG_RETURN(error); - } + THD *thd= ha_thd(); + if (WSREP_NNULL(thd) && table_share->tmp_table == NO_TMP_TABLE && + ht->flags & HTON_WSREP_REPLICATION && !error) + error= wsrep_after_row(thd); #endif /* WITH_WSREP */ } @@ -7811,8 +7825,10 @@ TABLE_IO_WAIT(tracker, PSI_TABLE_UPDATE_ROW, active_index, 0, { error= update_row(old_data, new_data);}) - DBUG_PRINT("dml", ("UPDATE: %s => %s = %d", dbug_print_row(table, old_data, false), - dbug_print_row(table, new_data, false), error)); + DBUG_PRINT("dml", ("UPDATE: %s => %s = %d", + dbug_format_row(table, old_data, false).c_ptr_safe(), + dbug_format_row(table, new_data, false).c_ptr_safe(), + error)); MYSQL_UPDATE_ROW_DONE(error); if (likely(!error)) @@ -7892,7 +7908,8 @@ TABLE_IO_WAIT(tracker, PSI_TABLE_DELETE_ROW, active_index, error, { error= delete_row(buf);}) - DBUG_PRINT("dml", ("DELETE: %s = %d", dbug_print_row(table, buf, false), error)); + DBUG_PRINT("dml", ("DELETE: %s = %d", + dbug_format_row(table, buf, false).c_ptr_safe(), error)); MYSQL_DELETE_ROW_DONE(error); if (likely(!error)) { @@ -8236,16 +8253,6 @@ VERSIONING functions ******************************************************************************/ -bool Vers_parse_info::is_start(const char *name) const -{ - DBUG_ASSERT(name); - return as_row.start && as_row.start.streq(name); -} -bool Vers_parse_info::is_end(const char *name) const -{ - DBUG_ASSERT(name); - return as_row.end && as_row.end.streq(name); -} bool Vers_parse_info::is_start(const Create_field &f) const { return f.flags & VERS_ROW_START; @@ -8300,8 +8307,8 @@ return false; } -const Lex_ident Vers_parse_info::default_start= "row_start"; -const Lex_ident Vers_parse_info::default_end= "row_end"; +const Lex_ident Vers_parse_info::default_start= { STRING_WITH_LEN("row_start")}; +const Lex_ident Vers_parse_info::default_end= { STRING_WITH_LEN("row_end")}; bool Vers_parse_info::fix_implicit(THD *thd, Alter_info *alter_info) { @@ -8560,7 +8567,7 @@ if (alter_info->flags & ALTER_ADD_SYSTEM_VERSIONING) { - if (check_sys_fields(table_name, share->db, alter_info)) + if (check_sys_fields(share->table_name, share->db, alter_info)) return true; } @@ -8866,8 +8873,8 @@ } } - bool res= period_info.check_field(row_start, period.start.str) - || period_info.check_field(row_end, period.end.str); + bool res= period_info.check_field(row_start, period.start) + || period_info.check_field(row_end, period.end); if (res) return true; diff -Nru mariadb-10.11.11/sql/handler.h mariadb-10.11.13/sql/handler.h --- mariadb-10.11.11/sql/handler.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/handler.h 2025-05-19 16:14:25.000000000 +0000 @@ -2117,8 +2117,6 @@ } protected: - bool is_start(const char *name) const; - bool is_end(const char *name) const; bool is_start(const Create_field &f) const; bool is_end(const Create_field &f) const; bool fix_implicit(THD *thd, Alter_info *alter_info); @@ -5444,6 +5442,6 @@ bool versioned); #ifndef DBUG_OFF -const char* dbug_print_row(TABLE *table, const uchar *rec, bool print_names= true); +String dbug_format_row(TABLE *table, const uchar *rec, bool print_names= true); #endif /* DBUG_OFF */ #endif /* HANDLER_INCLUDED */ diff -Nru mariadb-10.11.11/sql/item.cc mariadb-10.11.13/sql/item.cc --- mariadb-10.11.11/sql/item.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item.cc 2025-05-19 16:14:25.000000000 +0000 @@ -5321,6 +5321,7 @@ double Item_copy_string::val_real() { + DBUG_ASSERT(copied_in); int err_not_used; char *end_not_used; return (null_value ? 0.0 : @@ -5331,6 +5332,7 @@ longlong Item_copy_string::val_int() { + DBUG_ASSERT(copied_in); int err; return null_value ? 0 : str_value.charset()->strntoll(str_value.ptr(), str_value.length(), 10, @@ -5340,6 +5342,7 @@ int Item_copy_string::save_in_field(Field *field, bool no_conversions) { + DBUG_ASSERT(copied_in); return save_str_value_in_field(field, &str_value); } @@ -5350,11 +5353,15 @@ if (res && res != &str_value) str_value.copy(*res); null_value=item->null_value; +#ifndef DBUG_OFF + copied_in= 1; +#endif } /* ARGSUSED */ String *Item_copy_string::val_str(String *str) { + DBUG_ASSERT(copied_in); // Item_copy_string is used without fix_fields call if (null_value) return (String*) 0; @@ -5364,6 +5371,7 @@ my_decimal *Item_copy_string::val_decimal(my_decimal *decimal_value) { + DBUG_ASSERT(copied_in); // Item_copy_string is used without fix_fields call if (null_value) return (my_decimal *) 0; @@ -11067,8 +11075,8 @@ {} /** - Wrapper of hide_view_error call for Name_resolution_context error - processor. + Wrapper of replace_view_error_with_generic call for Name_resolution_context + error processor. @note hide view underlying tables details in error messages @@ -11076,7 +11084,7 @@ void view_error_processor(THD *thd, void *data) { - ((TABLE_LIST *)data)->hide_view_error(thd); + ((TABLE_LIST *)data)->replace_view_error_with_generic(thd); } diff -Nru mariadb-10.11.11/sql/item.h mariadb-10.11.13/sql/item.h --- mariadb-10.11.11/sql/item.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item.h 2025-05-19 16:14:25.000000000 +0000 @@ -757,6 +757,17 @@ virtual const String *const_ptr_string() const { return NULL; } }; +struct subselect_table_finder_param +{ + THD *thd; + /* + We're searching for different TABLE_LIST objects referring to the same + table as this one + */ + const TABLE_LIST *find; + /* NUL - not found, ERROR_TABLE - search error, or the found table reference */ + TABLE_LIST *dup; +}; /****************************************************************************/ @@ -1954,6 +1965,19 @@ */ virtual Item *clone_item(THD *thd) const { return nullptr; } + /* + @detail + The meaning of this function seems to be: + Check what the item would return if it was provided with two identical + non-NULL arguments. + It is not clear why it is defined for generic class Item or what its other + uses are. + + @return + COND_TRUE Would return true + COND_FALSE Would return false + COND_OK May return either, depending on the argument type. + */ virtual cond_result eq_cmp_result() const { return COND_OK; } inline uint float_length(uint decimals_par) const { return decimals < FLOATING_POINT_DECIMALS ? (DBL_DIG+2+decimals_par) : DBL_DIG+8;} @@ -2292,6 +2316,7 @@ set_extraction_flag(*(int16*)arg); return 0; } + virtual bool subselect_table_finder_processor(void *arg) { return 0; }; /* TRUE if the expression depends only on the table indicated by tab_map @@ -6673,8 +6698,15 @@ Type_std_attributes::set(item); name= item->name; set_handler(item->type_handler()); +#ifndef DBUG_OFF + copied_in= 0; +#endif } +#ifndef DBUG_OFF + bool copied_in; +#endif + public: /** @@ -6740,7 +6772,10 @@ double val_real() override; longlong val_int() override; bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override - { return get_date_from_string(thd, ltime, fuzzydate); } + { + DBUG_ASSERT(copied_in); + return get_date_from_string(thd, ltime, fuzzydate); + } void copy() override; int save_in_field(Field *field, bool no_conversions) override; Item *do_get_copy(THD *thd) const override @@ -6770,9 +6805,13 @@ null_value= tmp.is_null(); m_value= tmp.is_null() ? Timestamp_or_zero_datetime() : Timestamp_or_zero_datetime(tmp); +#ifndef DBUG_OFF + copied_in=1; +#endif } int save_in_field(Field *field, bool) override { + DBUG_ASSERT(copied_in); DBUG_ASSERT(sane()); if (null_value) return set_field_to_null(field); @@ -6781,30 +6820,35 @@ } longlong val_int() override { + DBUG_ASSERT(copied_in); DBUG_ASSERT(sane()); return null_value ? 0 : m_value.to_datetime(current_thd).to_longlong(); } double val_real() override { + DBUG_ASSERT(copied_in); DBUG_ASSERT(sane()); return null_value ? 0e0 : m_value.to_datetime(current_thd).to_double(); } String *val_str(String *to) override { + DBUG_ASSERT(copied_in); DBUG_ASSERT(sane()); return null_value ? NULL : m_value.to_datetime(current_thd).to_string(to, decimals); } my_decimal *val_decimal(my_decimal *to) override { + DBUG_ASSERT(copied_in); DBUG_ASSERT(sane()); return null_value ? NULL : m_value.to_datetime(current_thd).to_decimal(to); } bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate) override { + DBUG_ASSERT(copied_in); DBUG_ASSERT(sane()); bool res= m_value.to_TIME(thd, ltime, fuzzydate); DBUG_ASSERT(!res); @@ -6812,6 +6856,7 @@ } bool val_native(THD *thd, Native *to) override { + DBUG_ASSERT(copied_in); DBUG_ASSERT(sane()); return null_value || m_value.to_native(to, decimals); } diff -Nru mariadb-10.11.11/sql/item_cmpfunc.h mariadb-10.11.13/sql/item_cmpfunc.h --- mariadb-10.11.11/sql/item_cmpfunc.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item_cmpfunc.h 2025-05-19 16:14:25.000000000 +0000 @@ -1003,6 +1003,23 @@ class Item_func_between :public Item_func_opt_neg { + /* + If the types of the arguments to BETWEEN permit, then: + + WHERE const1 BETWEEN expr2 AND field1 + can be optimized as if it was just: + WHERE const1 <= field1 + + as expr2 could be an arbitrary expression. More generally, + this optimization is permitted if aggregation for comparison + for three expressions (const1,const2,field1) and for two + expressions (const1,field1) return the same type handler. + + @param [IN] field_item - This is a field from the right side + of the BETWEEN operator. + */ + bool can_optimize_range_const(Item_field *field_item) const; + protected: SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Field *field, Item *value) override; @@ -2945,9 +2962,18 @@ TODO: We could still replace "expr1" to "const" in "expr1 LIKE expr2" in case of a "PAD SPACE" collation, but only if "expr2" has '%' - at the end. + at the end. */ - return compare_collation() == &my_charset_bin ? COND_TRUE : COND_OK; + if (compare_collation() == &my_charset_bin) + { + /* + 'foo' NOT LIKE 'foo' is false, + 'foo' LIKE 'foo' is true. + */ + return negated? COND_FALSE : COND_TRUE; + } + + return COND_OK; } void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, table_map usable_tables, SARGABLE_PARAM **sargables) diff -Nru mariadb-10.11.11/sql/item_func.cc mariadb-10.11.13/sql/item_func.cc --- mariadb-10.11.11/sql/item_func.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item_func.cc 2025-05-19 16:14:25.000000000 +0000 @@ -7068,6 +7068,16 @@ /***************************************************************************** SEQUENCE functions *****************************************************************************/ +bool Item_func_nextval::check_access_and_fix_fields(THD *thd, Item **ref, + privilege_t want_access) +{ + table_list->sequence= false; + bool error= check_single_table_access(thd, want_access, table_list, false); + table_list->sequence= true; + if (error && table_list->belong_to_view) + table_list->replace_view_error_with_generic(thd); + return error || Item_longlong_func::fix_fields(thd, ref); +} longlong Item_func_nextval::val_int() { diff -Nru mariadb-10.11.11/sql/item_func.h mariadb-10.11.13/sql/item_func.h --- mariadb-10.11.11/sql/item_func.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item_func.h 2025-05-19 16:14:25.000000000 +0000 @@ -4234,6 +4234,7 @@ protected: TABLE_LIST *table_list; TABLE *table; + bool check_access_and_fix_fields(THD *, Item **ref, privilege_t); public: Item_func_nextval(THD *thd, TABLE_LIST *table_list_arg): Item_longlong_func(thd), table_list(table_list_arg) {} @@ -4243,6 +4244,8 @@ static LEX_CSTRING name= {STRING_WITH_LEN("nextval") }; return name; } + bool fix_fields(THD *thd, Item **ref) override + { return check_access_and_fix_fields(thd, ref, INSERT_ACL | SELECT_ACL); } bool fix_length_and_dec(THD *thd) override { unsigned_flag= 0; @@ -4284,6 +4287,8 @@ public: Item_func_lastval(THD *thd, TABLE_LIST *table_list_arg): Item_func_nextval(thd, table_list_arg) {} + bool fix_fields(THD *thd, Item **ref) override + { return check_access_and_fix_fields(thd, ref, SELECT_ACL); } longlong val_int() override; LEX_CSTRING func_name_cstring() const override { @@ -4308,6 +4313,8 @@ : Item_func_nextval(thd, table_list_arg), nextval(nextval_arg), round(round_arg), is_used(is_used_arg) {} + bool fix_fields(THD *thd, Item **ref) override + { return check_access_and_fix_fields(thd, ref, INSERT_ACL); } longlong val_int() override; LEX_CSTRING func_name_cstring() const override { diff -Nru mariadb-10.11.11/sql/item_geofunc.cc mariadb-10.11.13/sql/item_geofunc.cc --- mariadb-10.11.11/sql/item_geofunc.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item_geofunc.cc 2025-05-19 16:14:25.000000000 +0000 @@ -91,6 +91,15 @@ { String *str_ret= args[0]->val_str(str); null_value= args[0]->null_value; + if (!null_value && arg_count == 2 && !args[1]->null_value) { + srid= (uint32)args[1]->val_int(); + + if (str->copy(*str_ret)) + return 0; + + int4store(str->ptr(), srid); + return str; + } return str_ret; } @@ -2524,7 +2533,7 @@ String *arg2= args[1]->val_str(&bak2); double distance= 0.0; double sphere_radius= 6370986.0; // Default radius equals Earth radius - + null_value= (args[0]->null_value || args[1]->null_value); if (null_value) { @@ -2542,7 +2551,7 @@ } if (sphere_radius <= 0) { - my_error(ER_INTERNAL_ERROR, MYF(0), "Radius must be greater than zero."); + my_error(ER_GIS_UNSUPPORTED_ARGUMENT, MYF(0), func_name()); return 1; } } @@ -2554,26 +2563,27 @@ my_error(ER_GIS_INVALID_DATA, MYF(0), "ST_Distance_Sphere"); goto handle_errors; } -// Method allowed for points and multipoints + // Method allowed for points and multipoints if (!(g1->get_class_info()->m_type_id == Geometry::wkb_point || g1->get_class_info()->m_type_id == Geometry::wkb_multipoint) || !(g2->get_class_info()->m_type_id == Geometry::wkb_point || g2->get_class_info()->m_type_id == Geometry::wkb_multipoint)) { - // Generate error message in case different geometry is used? - my_error(ER_INTERNAL_ERROR, MYF(0), func_name()); + // Generate error message in case of unexpected geometry. + my_error(ER_GIS_UNSUPPORTED_ARGUMENT, MYF(0), func_name()); return 0; } distance= spherical_distance_points(g1, g2, sphere_radius); if (distance < 0) { - my_error(ER_INTERNAL_ERROR, MYF(0), "Returned distance cannot be negative."); + my_error(ER_INTERNAL_ERROR, MYF(0), + "Returned distance cannot be negative."); return 1; } return distance; - handle_errors: - return 0; +handle_errors: + return 0; } diff -Nru mariadb-10.11.11/sql/item_jsonfunc.cc mariadb-10.11.13/sql/item_jsonfunc.cc --- mariadb-10.11.11/sql/item_jsonfunc.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item_jsonfunc.cc 2025-05-19 16:14:25.000000000 +0000 @@ -74,7 +74,8 @@ } -static bool append_simple(String *s, const char *a, size_t a_len) +static bool __attribute__((warn_unused_result)) +append_simple(String *s, const char *a, size_t a_len) { if (!s->realloc_with_extra_if_needed(s->length() + a_len)) { @@ -86,7 +87,8 @@ } -static inline bool append_simple(String *s, const uchar *a, size_t a_len) +static inline bool __attribute__((warn_unused_result)) +append_simple(String *s, const uchar *a, size_t a_len) { return append_simple(s, (const char *) a, a_len); } @@ -300,8 +302,10 @@ nice_js->length(0); nice_js->set_charset(je->s.cs); - nice_js->alloc(je->s.str_end - je->s.c_str + 32); + if (nice_js->alloc(je->s.str_end - je->s.c_str + 32)) + goto error; + DBUG_ASSERT(mode != Item_func_json_format::DETAILED || (tab_size >= 0 && tab_size <= TAB_SIZE_LIMIT)); @@ -347,7 +351,8 @@ goto error; nice_js->append('"'); - append_simple(nice_js, key_start, key_end - key_start); + if (append_simple(nice_js, key_start, key_end - key_start)) + goto error; nice_js->append(colon, colon_len); } /* now we have key value to handle, so no 'break'. */ @@ -851,7 +856,7 @@ bool Item_func_json_unquote::fix_length_and_dec(THD *thd) { - collation.set(&my_charset_utf8mb3_general_ci, + collation.set(&my_charset_utf8mb4_bin, DERIVATION_COERCIBLE, MY_REPERTOIRE_ASCII); max_length= args[0]->max_char_length() * collation.collation->mbmaxlen; set_maybe_null(); @@ -894,12 +899,12 @@ return js; str->length(0); - str->set_charset(&my_charset_utf8mb3_general_ci); + str->set_charset(&my_charset_utf8mb4_bin); if (str->realloc_with_extra_if_needed(je.value_len) || (c_len= json_unescape(js->charset(), je.value, je.value + je.value_len, - &my_charset_utf8mb3_general_ci, + &my_charset_utf8mb4_bin, (uchar *) str->ptr(), (uchar *) (str->ptr() + je.value_len))) < 0) goto error; @@ -2248,24 +2253,67 @@ str->set_charset(js->charset()); if (item_pos) { - if (append_simple(str, js->ptr(), item_pos - js->ptr()) || - (n_item > 0 && str->append(" ", 1)) || - append_json_value(str, args[n_arg+1], &tmp_val) || - str->append(",", 1) || - (n_item == 0 && str->append(" ", 1)) || - append_simple(str, item_pos, js->end() - item_pos)) + my_ptrdiff_t size= item_pos - js->ptr(); + if (append_simple(str, js->ptr(), size)) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int) size); goto return_null; /* Out of memory. */ + } + if (n_item > 0 && str->append(" ", 1)) + { + my_error(ER_OUTOFMEMORY, MYF(0), 1); + goto return_null; /* Out of memory. */ + } + if (append_json_value(str, args[n_arg+1], &tmp_val)) + { + my_error(ER_OUTOFMEMORY, MYF(0), tmp_val.length()); + goto return_null; /* Out of memory. */ + } + if (str->append(",", 1)) + { + my_error(ER_OUTOFMEMORY, MYF(0), 1); + goto return_null; /* Out of memory. */ + } + if (n_item == 0 && str->append(" ", 1)) + { + my_error(ER_OUTOFMEMORY, MYF(0), 1); + goto return_null; /* Out of memory. */ + } + size= js->end() - item_pos; + if (append_simple(str, item_pos, size)) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int) size); + goto return_null; /* Out of memory. */ + } } else { + my_ptrdiff_t size; /* Insert position wasn't found - append to the array. */ DBUG_ASSERT(je.state == JST_ARRAY_END); item_pos= (const char *) (je.s.c_str - je.sav_c_len); - if (append_simple(str, js->ptr(), item_pos - js->ptr()) || - (n_item > 0 && str->append(", ", 2)) || - append_json_value(str, args[n_arg+1], &tmp_val) || - append_simple(str, item_pos, js->end() - item_pos)) + size= item_pos - js->ptr(); + if (append_simple(str, js->ptr(), size)) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int) size); + goto return_null; /* Out of memory. */ + } + if (n_item > 0 && str->append(", ", 2)) + { + my_error(ER_OUTOFMEMORY, MYF(0), 2); goto return_null; /* Out of memory. */ + } + if (append_json_value(str, args[n_arg+1], &tmp_val)) + { + my_error(ER_OUTOFMEMORY, MYF(0), tmp_val.length()); + goto return_null; /* Out of memory. */ + } + size= js->end() - item_pos; + if (append_simple(str, item_pos, size)) + { + my_error(ER_OUTOFMEMORY, MYF(0), (int) size); + goto return_null; /* Out of memory. */ + } } { @@ -4117,13 +4165,23 @@ goto error; if (je.value_type == JSON_VALUE_STRING) { - if (value2.realloc_with_extra_if_needed(je.value_len) || - (c_len= json_unescape(js->charset(), je.value, + if (value2.realloc_with_extra_if_needed(je.value_len)) + { + my_error(ER_OUTOFMEMORY, MYF(0), je.value_len); + goto error; + } + if ((c_len= json_unescape(js->charset(), je.value, je.value + je.value_len, - &my_charset_utf8mb3_general_ci, + &my_charset_utf8mb4_bin, (uchar *) value2.ptr(), (uchar *) (value2.ptr() + je.value_len))) < 0) + { + if (current_thd) + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR), + 0, "comparison", (int)((const char *) je.s.c_str - js->ptr())); goto error; + } value2.length(c_len); js= &value2; @@ -4166,13 +4224,23 @@ if (type == JSON_VALUE_STRING) { - if (value1.realloc_with_extra_if_needed(value_len) || - (c_len= json_unescape(value1.charset(), (uchar *) value, + if (value1.realloc_with_extra_if_needed(value_len)) + { + my_error(ER_OUTOFMEMORY, MYF(0), value_len); + return 1; + } + if ((c_len= json_unescape(value1.charset(), (uchar *) value, (uchar *) value+value_len, - &my_charset_utf8mb3_general_ci, + &my_charset_utf8mb4_bin, (uchar *) value1.ptr(), (uchar *) (value1.ptr() + value_len))) < 0) + { + if (current_thd) + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_JSON_BAD_CHR, ER_THD(current_thd, ER_JSON_BAD_CHR), + 0, "equality comparison", 0); return 1; + } value1.length(c_len); res1= &value1; } diff -Nru mariadb-10.11.11/sql/item_strfunc.cc mariadb-10.11.13/sql/item_strfunc.cc --- mariadb-10.11.11/sql/item_strfunc.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item_strfunc.cc 2025-05-19 16:14:25.000000000 +0000 @@ -56,7 +56,6 @@ #include "sql_statistics.h" /* fmtlib include (https://fmt.dev/). */ -#define FMT_STATIC_THOUSANDS_SEPARATOR ',' #define FMT_HEADER_ONLY 1 #include "fmt/args.h" @@ -1403,6 +1402,13 @@ }; }; +struct fmt_locale_comma : std::numpunct +{ + char do_thousands_sep() const override { return ','; } + std::string do_grouping() const override { return "\3"; } +}; +static std::locale fmt_locale(std::locale(), new fmt_locale_comma); + /* SFORMAT(format_string, ...) This function receives a formatting specification string and N parameters @@ -1455,7 +1461,7 @@ /* Create the string output */ try { - auto text = fmt::vformat(fmt_arg->c_ptr_safe(), arg_store); + auto text = fmt::vformat(fmt_locale, fmt_arg->c_ptr_safe(), arg_store); res->length(0); res->set_charset(collation.collation); res->append(text.c_str(), text.size(), fmt_arg->charset()); diff -Nru mariadb-10.11.11/sql/item_subselect.cc mariadb-10.11.13/sql/item_subselect.cc --- mariadb-10.11.11/sql/item_subselect.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item_subselect.cc 2025-05-19 16:14:25.000000000 +0000 @@ -7147,3 +7147,27 @@ for (uint i= 0; i < merge_keys_count; i++) partial_match_array_sizes[i]= merge_keys[i]->get_key_buff_elements(); } + + +/* + Check if somewhere inside this subselect we read the table. This means a + full read "(SELECT ... FROM tbl)", outside reference to tbl.column does not + count +*/ + +bool +Item_subselect::subselect_table_finder_processor(void *arg) +{ + subselect_table_finder_param *param= (subselect_table_finder_param *)arg; + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + { + TABLE_LIST *dup; + if ((dup= sl->find_table(param->thd, ¶m->find->db, + ¶m->find->table_name))) + { + param->dup= dup; + return TRUE; + } + } + return FALSE; +}; diff -Nru mariadb-10.11.11/sql/item_subselect.h mariadb-10.11.13/sql/item_subselect.h --- mariadb-10.11.11/sql/item_subselect.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/item_subselect.h 2025-05-19 16:14:25.000000000 +0000 @@ -273,6 +273,7 @@ { return TRUE; } + bool subselect_table_finder_processor(void *arg) override; void register_as_with_rec_ref(With_element *with_elem); void init_expr_cache_tracker(THD *thd); diff -Nru mariadb-10.11.11/sql/lex_string.h mariadb-10.11.13/sql/lex_string.h --- mariadb-10.11.11/sql/lex_string.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/lex_string.h 2025-05-19 16:14:25.000000000 +0000 @@ -110,7 +110,7 @@ class Lex_cstring_strlen: public Lex_cstring { public: - Lex_cstring_strlen(const char *from) + explicit Lex_cstring_strlen(const char *from) :Lex_cstring(from, from ? strlen(from) : 0) { } }; diff -Nru mariadb-10.11.11/sql/log.cc mariadb-10.11.13/sql/log.cc --- mariadb-10.11.11/sql/log.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/log.cc 2025-05-19 16:14:25.000000000 +0000 @@ -322,6 +322,11 @@ incident= TRUE; } + void clear_incident(void) + { + incident= FALSE; + } + bool has_incident(void) { return(incident); @@ -1932,6 +1937,16 @@ if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE)) DBUG_RETURN(1); +#ifdef WITH_WSREP + /* Wsrep transaction was BF aborted but it must replay because certification + succeeded. The transaction must not be written into binlog yet, it will + be done during commit after the replay. */ + if (WSREP(thd) && wsrep_must_replay(thd)) + { + DBUG_RETURN(0); + } +#endif /* WITH_WSREP */ + /* Doing a commit or a rollback including non-transactional tables, i.e., ending a transaction where we might write the transaction @@ -2530,6 +2545,18 @@ } +void binlog_clear_incident(THD *thd) +{ + binlog_cache_mngr *const cache_mngr= opt_bin_log ? + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton) : 0; + if (cache_mngr) + { + cache_mngr->stmt_cache.clear_incident(); + cache_mngr->trx_cache.clear_incident(); + } +} + + void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional) { DBUG_ENTER("MYSQL_BIN_LOG::set_write_error"); @@ -7971,7 +7998,12 @@ { DBUG_RETURN(0); } - else if (!(thd->variables.option_bits & OPTION_BIN_LOG)) + + if (!(thd->variables.option_bits & OPTION_BIN_LOG) +#ifdef WITH_WSREP + && !WSREP(thd) +#endif + ) { cache_mngr->need_unlog= false; DBUG_RETURN(0); @@ -8878,6 +8910,13 @@ bool has_xid= entry->end_event->get_type_code() == XID_EVENT; DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt"); +#ifdef WITH_WSREP + if (WSREP(entry->thd) && + !(entry->thd->variables.option_bits & OPTION_BIN_LOG)) + { + DBUG_RETURN(0); + } +#endif /* WITH_WSREP */ /* An error in the trx_cache will truncate the cache to the last good diff -Nru mariadb-10.11.11/sql/log.h mariadb-10.11.13/sql/log.h --- mariadb-10.11.11/sql/log.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/log.h 2025-05-19 16:14:25.000000000 +0000 @@ -1186,6 +1186,7 @@ void make_default_log_name(char **out, const char* log_ext, bool once); void binlog_reset_cache(THD *thd); +void binlog_clear_incident(THD *thd); bool write_annotated_row(THD *thd); extern MYSQL_PLUGIN_IMPORT MYSQL_BIN_LOG mysql_bin_log; diff -Nru mariadb-10.11.11/sql/mysql_install_db.cc mariadb-10.11.13/sql/mysql_install_db.cc --- mariadb-10.11.11/sql/mysql_install_db.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/mysql_install_db.cc 2025-05-19 16:14:25.000000000 +0000 @@ -336,7 +336,7 @@ " --bootstrap" " --datadir=." " --tmpdir=." - " --loose-innodb-buffer-pool-size=20M" + " --loose-innodb-buffer-pool-size=21M" "\"" , mysqld_path, opt_verbose_bootstrap ? "--console" : ""); return cmdline; @@ -344,10 +344,29 @@ static char my_ini_path[MAX_PATH]; +/** + Wrapper for WritePrivateProfileStringA, with retries and sleeps + if file is locked by another process. +*/ +static BOOL write_private_profile_string_with_retries(const char *appname, + const char *key, const char *val, const char *filename) +{ + static constexpr int RETRIES=50; + static constexpr int SLEEP_MS=10; + for (int n= RETRIES;; n--) + { + if (WritePrivateProfileStringA(appname, key, val, filename)) + return TRUE; + if (GetLastError() != ERROR_ACCESS_DENIED || !n) + return FALSE; + Sleep(SLEEP_MS); + } +} + static void write_myini_str(const char *key, const char* val, const char *section="mysqld") { DBUG_ASSERT(my_ini_path[0]); - if (!WritePrivateProfileString(section, key, val, my_ini_path)) + if (!write_private_profile_string_with_retries(section, key, val, my_ini_path)) { die("Can't write to ini file key=%s, val=%s, section=%s, Windows error %u",key,val,section, GetLastError()); diff -Nru mariadb-10.11.11/sql/mysql_upgrade_service.cc mariadb-10.11.13/sql/mysql_upgrade_service.cc --- mariadb-10.11.11/sql/mysql_upgrade_service.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/mysql_upgrade_service.cc 2025-05-19 16:14:25.000000000 +0000 @@ -45,7 +45,6 @@ "OPTIONS:" static char mysqld_path[MAX_PATH]; -static char mysqladmin_path[MAX_PATH]; static char mysqlupgrade_path[MAX_PATH]; static char defaults_file_param[MAX_PATH + 16]; /*--defaults-file= */ @@ -302,13 +301,29 @@ our --skip-grant-tables do not work anymore after mysql_upgrade that does "flush privileges". Instead, the shutdown event is set. */ +#define OPEN_EVENT_RETRY_SLEEP_MS 100 +#define OPEN_EVENT_MAX_RETRIES 50 + void initiate_mysqld_shutdown() { char event_name[32]; DWORD pid= GetProcessId(mysqld_process); sprintf_s(event_name, "MySQLShutdown%d", pid); - HANDLE shutdown_handle= OpenEvent(EVENT_MODIFY_STATE, FALSE, event_name); - if(!shutdown_handle) + + HANDLE shutdown_handle; + for (int i= 0;; i++) + { + shutdown_handle= OpenEvent(EVENT_MODIFY_STATE, FALSE, event_name); + if(shutdown_handle != nullptr || i == OPEN_EVENT_MAX_RETRIES) + break; + if (WaitForSingleObject(mysqld_process, OPEN_EVENT_RETRY_SLEEP_MS) != + WAIT_TIMEOUT) + { + die("server process exited before shutdown event was created"); + break; + } + } + if (!shutdown_handle) { die("OpenEvent() failed for shutdown event"); } @@ -403,6 +418,26 @@ } +/** + Waits until starting server can be connected to, via given named pipe, with timeout + Dies if either server process exited meanwhile, or when timeout was exceeded. +*/ +static void wait_for_server_startup(HANDLE process, const char *named_pipe, DWORD timeout_sec) +{ + unsigned long long end_time= GetTickCount64() + 1000ULL*timeout_sec; + for (;;) + { + if (WaitNamedPipe(named_pipe, 0)) + return; + + if (GetTickCount64() >= end_time) + die("Server did not startup after %lu seconds", timeout_sec); + + if (WaitForSingleObject(process, 100) != WAIT_TIMEOUT) + die("Server did not start"); + } +} + int main(int argc, char **argv) { @@ -419,8 +454,9 @@ /* Get full path to mysqld, we need it when changing service configuration. - Assume installation layout, i.e mysqld.exe, mysqladmin.exe, mysqlupgrade.exe - and mysql_upgrade_service.exe are in the same directory. + Assume mysqld.exe in the same directory as this program. + mysql_upgrade.exe is either in the same directory, or pointed to by + MARIADB_UPGRADE_EXE environment variable (in case of MTR running it) */ GetModuleFileName(NULL, bindir, FN_REFLEN); p= strrchr(bindir, FN_LIBCHAR); @@ -429,15 +465,19 @@ *p= 0; } sprintf_s(mysqld_path, "%s\\mysqld.exe", bindir); - sprintf_s(mysqladmin_path, "%s\\mysqladmin.exe", bindir); sprintf_s(mysqlupgrade_path, "%s\\mysql_upgrade.exe", bindir); - char *paths[]= {mysqld_path, mysqladmin_path, mysqlupgrade_path}; - for(int i= 0; i< 3;i++) - { - if(GetFileAttributes(paths[i]) == INVALID_FILE_ATTRIBUTES) - die("File %s does not exist", paths[i]); + if (access(mysqld_path, 0)) + die("File %s does not exist", mysqld_path); + if (access(mysqlupgrade_path, 0)) + { + /* Try to get path from environment variable, set by MTR */ + char *alt_mysqlupgrade_path= getenv("MARIADB_UPGRADE_EXE"); + if (alt_mysqlupgrade_path) + sprintf_s(mysqlupgrade_path, "%s", alt_mysqlupgrade_path); } + if (access(mysqlupgrade_path, 0)) + die("File %s does not exist", mysqld_path); /* Messages written on stdout should not be buffered, GUI upgrade program @@ -482,6 +522,10 @@ DWORD start_duration_ms = 0; + char pipe_name[64]; + snprintf(pipe_name, sizeof(pipe_name), + "\\\\.\\pipe\\mysql_upgrade_service_%lu", GetCurrentProcessId()); + if (do_start_stop_server) { /* Start/stop server with --loose-innodb-fast-shutdown=1 */ @@ -493,37 +537,23 @@ { die("Cannot start mysqld.exe process, last error =%u", GetLastError()); } - char pipe_name[64]; - snprintf(pipe_name, sizeof(pipe_name), "\\\\.\\pipe\\mysql_upgrade_service_%lu", - GetCurrentProcessId()); - for (;;) - { - if (WaitForSingleObject(mysqld_process, 0) != WAIT_TIMEOUT) - die("mysqld.exe did not start"); - - if (WaitNamedPipe(pipe_name, 0)) - { - // Server started, shut it down. - initiate_mysqld_shutdown(); - if (WaitForSingleObject((HANDLE)mysqld_process, shutdown_timeout * 1000) != WAIT_OBJECT_0) - { - die("Could not shutdown server started with '--innodb-fast-shutdown=0'"); - } - DWORD exit_code; - if (!GetExitCodeProcess((HANDLE)mysqld_process, &exit_code)) - { - die("Could not get mysqld's exit code"); - } - if (exit_code) - { - die("Could not get successfully shutdown mysqld"); - } - CloseHandle(mysqld_process); - break; - } - Sleep(500); - start_duration_ms += 500; + wait_for_server_startup(mysqld_process, pipe_name, startup_timeout); + // Server started, shut it down. + initiate_mysqld_shutdown(); + if (WaitForSingleObject((HANDLE)mysqld_process, shutdown_timeout * 1000) != WAIT_OBJECT_0) + { + die("Could not shutdown server"); + } + DWORD exit_code; + if (!GetExitCodeProcess((HANDLE)mysqld_process, &exit_code)) + { + die("Could not get server's exit code"); + } + if (exit_code) + { + die("Could not get successfully shutdown server (exit code %u)",exit_code); } + CloseHandle(mysqld_process); } log("Phase %d/%d: Fixing server config file%s", ++phase, max_phases, @@ -550,22 +580,7 @@ } log("Phase %d/%d: Waiting for startup to complete",++phase,max_phases); - start_duration_ms= 0; - for(;;) - { - if (WaitForSingleObject(mysqld_process, 0) != WAIT_TIMEOUT) - die("mysqld.exe did not start"); - - if (run_tool(P_WAIT, mysqladmin_path, "--protocol=pipe", socket_param, - "ping", "--no-beep", NULL) == 0) - { - break; - } - if (start_duration_ms > startup_timeout*1000) - die("Server did not come up in %d seconds",startup_timeout); - Sleep(500); - start_duration_ms+= 500; - } + wait_for_server_startup(mysqld_process, pipe_name, startup_timeout); log("Phase %d/%d: Running mysql_upgrade",++phase,max_phases); int upgrade_err= (int) run_tool(P_WAIT, mysqlupgrade_path, diff -Nru mariadb-10.11.11/sql/mysqld.cc mariadb-10.11.13/sql/mysqld.cc --- mariadb-10.11.11/sql/mysqld.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/mysqld.cc 2025-05-19 16:14:25.000000000 +0000 @@ -420,7 +420,9 @@ char* opt_secure_file_priv; my_bool lower_case_file_system= 0; my_bool opt_large_pages= 0; +#ifdef HAVE_SOLARIS_LARGE_PAGES my_bool opt_super_large_pages= 0; +#endif my_bool opt_myisam_use_mmap= 0; uint opt_large_page_size= 0; #if defined(ENABLED_DEBUG_SYNC) @@ -1396,11 +1398,6 @@ static int systemd_sock_activation; /* systemd socket activation */ - -/** wakeup listening(main) thread by writing to this descriptor */ -static int termination_event_fd= -1; - - C_MODE_START #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE /** @@ -1453,9 +1450,14 @@ #endif /* OS specific variables */ - +#ifndef EMBEDDED_LIBRARY #ifdef _WIN32 +/** wakeup main thread by signaling this event */ HANDLE hEventShutdown; +#else +/** wakeup listening(main) thread by writing to this descriptor */ +static int termination_event_fd= -1; +#endif #endif @@ -3744,12 +3746,12 @@ #endif /* - When thread specific is set, both mysqld_server_initialized and thd - must be set, and we check that with DBUG_ASSERT. - - However, do not crash, if current_thd is NULL, in release version. + is_thread_specific is only relevant when a THD exist and the server + has fully started. is_thread_specific can be set during recovery by + Aria for functions that are normally only run in one thread. + However InnoDB sets thd early, so we can use it. */ - DBUG_ASSERT(!is_thread_specific || (mysqld_server_initialized && thd)); + DBUG_ASSERT(!is_thread_specific || thd || !plugins_are_initialized); if (is_thread_specific && likely(thd)) /* If thread specific memory */ { @@ -4118,7 +4120,7 @@ if (opt_large_pages) { DBUG_PRINT("info", ("Large page set")); - if (my_init_large_pages(opt_super_large_pages)) + if (my_init_large_pages()) { return 1; } @@ -5337,7 +5339,7 @@ MARIADB_REMOVED_OPTION("innodb-log-optimize-ddl"), MARIADB_REMOVED_OPTION("innodb-lru-flush-size"), MARIADB_REMOVED_OPTION("innodb-page-cleaners"), - MARIADB_REMOVED_OPTION("innodb-purge-truncate-frequency"), + MARIADB_REMOVED_OPTION("innodb-purge-rseg-truncate-frequency"), MARIADB_REMOVED_OPTION("innodb-replication-delay"), MARIADB_REMOVED_OPTION("innodb-scrub-log"), MARIADB_REMOVED_OPTION("innodb-scrub-log-speed"), @@ -7872,7 +7874,9 @@ bzero((char*) &global_status_var, offsetof(STATUS_VAR, last_cleared_system_status_var)); opt_large_pages= 0; +#ifdef HAVE_SOLARIS_LARGE_PAGES opt_super_large_pages= 0; +#endif #if defined(ENABLED_DEBUG_SYNC) opt_debug_sync_timeout= 0; #endif /* defined(ENABLED_DEBUG_SYNC) */ @@ -8872,15 +8876,22 @@ bool is_log= opt_log || global_system_variables.sql_log_slow || opt_bin_log; bool is_debug= IF_DBUG(!strstr(MYSQL_SERVER_SUFFIX_STR, "-debug"), 0); const char *is_valgrind= -#ifdef HAVE_VALGRIND +#ifdef HAVE_valgrind !strstr(MYSQL_SERVER_SUFFIX_STR, "-valgrind") ? "-valgrind" : #endif ""; + const char *is_asan= +#ifdef __SANITIZE_ADDRESS__ + !strstr(MYSQL_SERVER_SUFFIX_STR, "-asan") ? "-asan" : +#endif + ""; + return strxnmov(buf, size - 1, MYSQL_SERVER_VERSION, MYSQL_SERVER_SUFFIX_STR, IF_EMBEDDED("-embedded", ""), is_valgrind, + is_asan, is_debug ? "-debug" : "", is_log ? "-log" : "", NullS); @@ -9303,6 +9314,7 @@ PSI_stage_info stage_purging_old_relay_logs= { 0, "Purging old relay logs", 0}; PSI_stage_info stage_query_end= { 0, "Query end", 0}; PSI_stage_info stage_starting_cleanup= { 0, "Starting cleanup", 0}; +PSI_stage_info stage_slave_sql_cleanup= { 0, "Slave SQL thread ending", 0}; PSI_stage_info stage_rollback= { 0, "Rollback", 0}; PSI_stage_info stage_rollback_implicit= { 0, "Rollback_implicit", 0}; PSI_stage_info stage_commit= { 0, "Commit", 0}; @@ -9544,6 +9556,7 @@ & stage_preparing, & stage_purging_old_relay_logs, & stage_starting_cleanup, + & stage_slave_sql_cleanup, & stage_query_end, & stage_queueing_master_event_to_the_relay_log, & stage_reading_event_from_the_relay_log, diff -Nru mariadb-10.11.11/sql/mysqld.h mariadb-10.11.13/sql/mysqld.h --- mariadb-10.11.11/sql/mysqld.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/mysqld.h 2025-05-19 16:14:25.000000000 +0000 @@ -612,6 +612,7 @@ extern PSI_stage_info stage_purging_old_relay_logs; extern PSI_stage_info stage_query_end; extern PSI_stage_info stage_starting_cleanup; +extern PSI_stage_info stage_slave_sql_cleanup; extern PSI_stage_info stage_rollback; extern PSI_stage_info stage_rollback_implicit; extern PSI_stage_info stage_commit; diff -Nru mariadb-10.11.11/sql/net_serv.cc mariadb-10.11.13/sql/net_serv.cc --- mariadb-10.11.11/sql/net_serv.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/net_serv.cc 2025-05-19 16:14:25.000000000 +0000 @@ -773,18 +773,22 @@ } #endif /* !defined(MYSQL_SERVER) */ net->error= 2; /* Close socket */ - net->last_errno= (interrupted ? ER_NET_WRITE_INTERRUPTED : - ER_NET_ERROR_ON_WRITE); -#ifdef MYSQL_SERVER - if (global_system_variables.log_warnings > 3) + + if (net->vio->state != VIO_STATE_SHUTDOWN || net->last_errno == 0) { - sql_print_warning("Could not write packet: fd: %lld state: %d " - "errno: %d vio_errno: %d length: %ld", - (longlong) vio_fd(net->vio), (int) net->vio->state, - vio_errno(net->vio), net->last_errno, - (ulong) (end-pos)); - } + net->last_errno= (interrupted ? ER_NET_WRITE_INTERRUPTED : + ER_NET_ERROR_ON_WRITE); +#ifdef MYSQL_SERVER + if (global_system_variables.log_warnings > 3) + { + sql_print_warning("Could not write packet: fd: %lld state: %d " + "errno: %d vio_errno: %d length: %ld", + (longlong) vio_fd(net->vio), (int) net->vio->state, + vio_errno(net->vio), net->last_errno, + (ulong) (end-pos)); + } #endif + } MYSQL_SERVER_my_error(net->last_errno, MYF(0)); break; } @@ -1097,6 +1101,7 @@ ER_NET_READ_INTERRUPTED : ER_NET_READ_ERROR); #ifdef MYSQL_SERVER + strmake_buf(net->last_error, ER(net->last_errno)); if (global_system_variables.log_warnings > 3) { /* Log things as a warning */ diff -Nru mariadb-10.11.11/sql/opt_range.cc mariadb-10.11.13/sql/opt_range.cc --- mariadb-10.11.11/sql/opt_range.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/opt_range.cc 2025-05-19 16:14:25.000000000 +0000 @@ -8351,56 +8351,58 @@ /* Build conjunction of all SEL_TREEs for a simple predicate applying equalities - + SYNOPSIS get_full_func_mm_tree() param PARAM from SQL_SELECT::test_quick_select field_item field in the predicate - value constant in the predicate (or a field already read from + value constant in the predicate (or a field already read from a table in the case of dynamic range access) (for BETWEEN it contains the number of the field argument, - for IN it's always 0) + for IN it's always 0) inv TRUE <> NOT cond_func is considered (makes sense only when cond_func is BETWEEN or IN) DESCRIPTION - For a simple SARGable predicate of the form (f op c), where f is a field and - c is a constant, the function builds a conjunction of all SEL_TREES that can - be obtained by the substitution of f for all different fields equal to f. + For a simple SARGable predicate of the form (f op c), where f is a field + and c is a constant, the function builds a conjunction of all SEL_TREES that + can be obtained by the substitution of f for all different fields equal to f. - NOTES + NOTES If the WHERE condition contains a predicate (fi op c), then not only SELL_TREE for this predicate is built, but the trees for the results of substitution of fi for each fj belonging to the same multiple equality as fi are built as well. - E.g. for WHERE t1.a=t2.a AND t2.a > 10 + E.g. for WHERE t1.a=t2.a AND t2.a > 10 a SEL_TREE for t2.a > 10 will be built for quick select from t2 - and + and a SEL_TREE for t1.a > 10 will be built for quick select from t1. - A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2) is treated - in a similar way: we build a conjuction of trees for the results - of all substitutions of fi for equal fj. + A BETWEEN predicate of the form (fi [NOT] BETWEEN c1 AND c2), where fi + is some field, is treated in a similar way: we build a conjuction of + trees for the results of all substitutions of fi equal fj. + Yet a predicate of the form (c BETWEEN f1i AND f2i) is processed differently. It is considered as a conjuction of two SARGable - predicates (f1i <= c) and (f2i <=c) and the function get_full_func_mm_tree - is called for each of them separately producing trees for - AND j (f1j <=c ) and AND j (f2j <= c) + predicates (f1i <= c) and (c <= f2i) and the function get_full_func_mm_tree + is called for each of them separately producing trees for + AND j (f1j <= c) and AND j (c <= f2j) After this these two trees are united in one conjunctive tree. It's easy to see that the same tree is obtained for - AND j,k (f1j <=c AND f2k<=c) - which is equivalent to + AND j,k (f1j <= c AND c <= f2k) + which is equivalent to AND j,k (c BETWEEN f1j AND f2k). + The validity of the processing of the predicate (c NOT BETWEEN f1i AND f2i) which equivalent to (f1i > c OR f2i < c) is not so obvious. Here the - function get_full_func_mm_tree is called for (f1i > c) and (f2i < c) - producing trees for AND j (f1j > c) and AND j (f2j < c). Then this two - trees are united in one OR-tree. The expression + function get_full_func_mm_tree is called for (f1i > c) and called for + (f2i < c) producing trees for AND j (f1j > c) and AND j (f2j < c). Then + this two trees are united in one OR-tree. The expression (AND j (f1j > c) OR AND j (f2j < c) is equivalent to the expression - AND j,k (f1j > c OR f2k < c) - which is just a translation of + AND j,k (f1j > c OR f2k < c) + which is just a translation of AND j,k (c NOT BETWEEN f1j AND f2k) In the cases when one of the items f1, f2 is a constant c1 we do not create @@ -8413,9 +8415,9 @@ As to IN predicates only ones of the form (f IN (c1,...,cn)), where f1 is a field and c1,...,cn are constant, are considered as SARGable. We never try to narrow the index scan using predicates of - the form (c IN (c1,...,f,...,cn)). - - RETURN + the form (c IN (c1,...,f,...,cn)). + + RETURN Pointer to the tree representing the built conjunction of SEL_TREEs */ @@ -8513,6 +8515,11 @@ SEL_TREE *tree= li.ref()[0]->get_mm_tree(param, li.ref()); if (param->statement_should_be_aborted()) DBUG_RETURN(NULL); + bool orig_disable_index_merge= param->disable_index_merge_plans; + + if (list.elements > MAX_OR_ELEMENTS_FOR_INDEX_MERGE) + param->disable_index_merge_plans= true; + if (tree) { if (tree->type == SEL_TREE::IMPOSSIBLE && @@ -8529,7 +8536,10 @@ { SEL_TREE *new_tree= li.ref()[0]->get_mm_tree(param, li.ref()); if (new_tree == NULL || param->statement_should_be_aborted()) + { + param->disable_index_merge_plans= orig_disable_index_merge; DBUG_RETURN(NULL); + } tree= tree_or(param, tree, new_tree); if (tree == NULL || tree->type == SEL_TREE::ALWAYS) { @@ -8561,6 +8571,7 @@ if (replace_cond) *cond_ptr= replacement_item; } + param->disable_index_merge_plans= orig_disable_index_merge; DBUG_RETURN(tree); } @@ -8614,6 +8625,19 @@ } +bool +Item_func_between::can_optimize_range_const(Item_field *field_item) const +{ + const Type_handler *fi_handler= field_item->type_handler_for_comparison(); + Type_handler_hybrid_field_type cmp(fi_handler); + if (cmp.aggregate_for_comparison(args[0]->type_handler_for_comparison()) || + cmp.type_handler() != m_comparator.type_handler()) + return false; // Cannot optimize range because of type mismatch. + + return true; +} + + SEL_TREE * Item_func_between::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) { @@ -8639,6 +8663,8 @@ if (arguments()[i]->real_item()->type() == Item::FIELD_ITEM) { Item_field *field_item= (Item_field*) (arguments()[i]->real_item()); + if (!can_optimize_range_const(field_item)) + continue; SEL_TREE *tmp= get_full_func_mm_tree(param, field_item, (Item*)(intptr) i); if (negated) @@ -9952,6 +9978,8 @@ { bool must_be_ored= sel_trees_must_be_ored(param, tree1, tree2, ored_keys); no_imerge_from_ranges= must_be_ored; + if (param->disable_index_merge_plans) + no_imerge_from_ranges= true; if (no_imerge_from_ranges && no_merges1 && no_merges2) { @@ -16006,7 +16034,7 @@ Remember this key, and continue looking for a non-NULL key that satisfies some other condition. */ - memcpy(tmp_record, record, head->s->rec_buff_length); + memcpy(tmp_record, record, head->s->reclength); found_null= TRUE; continue; } @@ -16046,7 +16074,7 @@ */ if (found_null && result) { - memcpy(record, tmp_record, head->s->rec_buff_length); + memcpy(record, tmp_record, head->s->reclength); result= 0; } return result; @@ -16079,7 +16107,7 @@ ha_rkey_function find_flag; key_part_map keypart_map; QUICK_RANGE *cur_range; - int result; + int result= HA_ERR_KEY_NOT_FOUND; DBUG_ASSERT(min_max_ranges.elements > 0); @@ -16088,10 +16116,11 @@ get_dynamic(&min_max_ranges, (uchar*)&cur_range, range_idx - 1); /* - If the current value for the min/max argument is smaller than the left - boundary of cur_range, there is no need to check this range. + If the key has already been "moved" by a successful call to + ha_index_read_map, and the current value for the max argument + comes before the range, there is no need to check this range. */ - if (range_idx != min_max_ranges.elements && + if (!result && !(cur_range->flag & NO_MIN_RANGE) && (key_cmp(min_max_arg_part, (const uchar*) cur_range->min_key, min_max_arg_len) == -1)) diff -Nru mariadb-10.11.11/sql/opt_range.h mariadb-10.11.13/sql/opt_range.h --- mariadb-10.11.11/sql/opt_range.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/opt_range.h 2025-05-19 16:14:25.000000000 +0000 @@ -39,6 +39,32 @@ class JOIN; class Item_sum; +/* + When processing an OR clause with more than MAX_OR_ELEMENTS_FOR_INDEX_MERGE + disjuncts (i.e. OR-parts), do not construct index_merge plans from it. + + Some users have OR clauses with extremely large number of disjuncts, like: + + (key1=1 AND key2=10) OR + (key1=2 AND key2=20) OR + (key1=3 AND key2=30) OR + ... + + When processing this, the optimizer would try to build a lot of potential + index_merge plans. Hypothetically this could be useful as the cheapest plan + could be to pick a specific index for each disjunct and build: + + index_merge(key1 IN (1,3,8,15...), key2 IN (20, 40, 50 ...)) + + In practice this causes combinatorial amount of time to be spent in the range + analyzer, and most variants will be discarded when the range optimizer tries + to avoid this combinatorial explosion (which may or may not work depending on + the form of the WHERE clause). + In practice, very long ORs are served well enough by just considering range + accesses on individual indexes. +*/ +const int MAX_OR_ELEMENTS_FOR_INDEX_MERGE=100; + struct KEY_PART { uint16 key,part; /* See KEY_PART_INFO for meaning of the next two: */ @@ -889,6 +915,9 @@ */ bool remove_false_where_parts; + /* If TRUE, do not construct index_merge plans */ + bool disable_index_merge_plans; + /* Which functions should give SQL notes for unusable keys. */ diff -Nru mariadb-10.11.11/sql/rpl_injector.h mariadb-10.11.13/sql/rpl_injector.h --- mariadb-10.11.11/sql/rpl_injector.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/rpl_injector.h 2025-05-19 16:14:25.000000000 +0000 @@ -146,7 +146,6 @@ }; transaction() : m_thd(NULL) { } - transaction(transaction const&); ~transaction(); /* Clear transaction, i.e., make calls to 'good()' return false. */ diff -Nru mariadb-10.11.11/sql/rpl_mi.cc mariadb-10.11.13/sql/rpl_mi.cc --- mariadb-10.11.11/sql/rpl_mi.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/rpl_mi.cc 2025-05-19 16:14:25.000000000 +0000 @@ -21,6 +21,7 @@ #include "slave.h" #include "strfunc.h" #include "sql_repl.h" +#include #ifdef HAVE_REPLICATION @@ -1369,27 +1370,21 @@ Sql_condition::enum_warning_level warning) { Master_info *mi; - char buff[MAX_CONNECTION_NAME+1], *res; - size_t buff_length; DBUG_ENTER("get_master_info"); DBUG_PRINT("enter", ("connection_name: '%.*s'", (int) connection_name->length, connection_name->str)); - /* Make name lower case for comparison */ - res= strmake(buff, connection_name->str, connection_name->length); - my_casedn_str(system_charset_info, buff); - buff_length= (size_t) (res-buff); - + if (!connection_name->str) + connection_name= &empty_clex_str; mi= (Master_info*) my_hash_search(&master_info_hash, - (uchar*) buff, buff_length); + (uchar*) connection_name->str, + connection_name->length); if (!mi && warning != Sql_condition::WARN_LEVEL_NOTE) { my_error(WARN_NO_MASTER_INFO, - MYF(warning == Sql_condition::WARN_LEVEL_WARN ? ME_WARNING : - 0), - (int) connection_name->length, - connection_name->str); + MYF(warning == Sql_condition::WARN_LEVEL_WARN ? ME_WARNING : 0), + (int) connection_name->length, connection_name->str); } DBUG_RETURN(mi); } @@ -2074,4 +2069,52 @@ DBUG_RETURN(result); } +void setup_mysql_connection_for_master(MYSQL *mysql, Master_info *mi, + uint timeout) +{ + DBUG_ASSERT(mi); + DBUG_ASSERT(mi->mysql); + mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &timeout); + mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &timeout); + +#ifdef HAVE_OPENSSL + if (mi->ssl) + { + mysql_ssl_set(mysql, + mi->ssl_key[0]?mi->ssl_key:0, + mi->ssl_cert[0]?mi->ssl_cert:0, + mi->ssl_ca[0]?mi->ssl_ca:0, + mi->ssl_capath[0]?mi->ssl_capath:0, + mi->ssl_cipher[0]?mi->ssl_cipher:0); + mysql_options(mysql, MYSQL_OPT_SSL_CRL, + mi->ssl_crl[0] ? mi->ssl_crl : 0); + mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH, + mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0); + mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT, + &mi->ssl_verify_server_cert); + } +#endif + + /* + If server's default charset is not supported (like utf16, utf32) as client + charset, then set client charset to 'latin1' (default client charset). + */ + if (is_supported_parser_charset(default_charset_info)) + mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->cs_name.str); + else + { + sql_print_information("'%s' can not be used as client character set. " + "'%s' will be used as default client character set " + "while connecting to master.", + default_charset_info->cs_name.str, + default_client_charset_info->cs_name.str); + mysql_options(mysql, MYSQL_SET_CHARSET_NAME, + default_client_charset_info->cs_name.str); + } + + /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */ + if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr) + mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr); +} + #endif /* HAVE_REPLICATION */ diff -Nru mariadb-10.11.11/sql/rpl_mi.h mariadb-10.11.13/sql/rpl_mi.h --- mariadb-10.11.11/sql/rpl_mi.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/rpl_mi.h 2025-05-19 16:14:25.000000000 +0000 @@ -487,5 +487,16 @@ uint any_slave_sql_running(bool already_locked); bool give_error_if_slave_running(bool already_lock); +/* + Sets up the basic options for a MYSQL connection, mysql, to connect to the + primary server described by the Master_info parameter, mi. The timeout must + be passed explicitly, as different types of connections created by the slave + will use different values. + + Assumes mysql_init() has already been called on the mysql connection object. +*/ +void setup_mysql_connection_for_master(MYSQL *mysql, Master_info *mi, + uint timeout); + #endif /* HAVE_REPLICATION */ #endif /* RPL_MI_H */ diff -Nru mariadb-10.11.11/sql/rpl_parallel.cc mariadb-10.11.13/sql/rpl_parallel.cc --- mariadb-10.11.11/sql/rpl_parallel.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/rpl_parallel.cc 2025-05-19 16:14:25.000000000 +0000 @@ -124,8 +124,8 @@ else if (cmp == 0 && rli->group_master_log_pos < qev->future_event_master_log_pos) rli->group_master_log_pos= qev->future_event_master_log_pos; - mysql_mutex_unlock(&rli->data_lock); mysql_cond_broadcast(&rli->data_cond); + mysql_mutex_unlock(&rli->data_lock); } @@ -153,14 +153,12 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, rpl_parallel_entry *entry, rpl_group_info *rgi) { - THD *thd= rpt->thd; - wait_for_commit *wfc= &rgi->commit_orderer; - int err; - if (rgi->get_finish_event_group_called()) return; - thd->get_stmt_da()->set_overwrite_status(true); + THD *thd= rpt->thd; + wait_for_commit *wfc= &rgi->commit_orderer; + int err; if (unlikely(rgi->worker_error)) { @@ -320,10 +318,6 @@ wait_for_pending_deadlock_kill(thd, rgi); thd->clear_error(); thd->reset_killed(); - /* - Would do thd->get_stmt_da()->set_overwrite_status(false) here, but - reset_diagnostics_area() already does that. - */ thd->get_stmt_da()->reset_diagnostics_area(); wfc->wakeup_subsequent_commits(rgi->worker_error); rgi->did_mark_start_commit= false; @@ -1597,9 +1591,7 @@ else { delete qev->ev; - thd->get_stmt_da()->set_overwrite_status(true); err= thd->wait_for_prior_commit(); - thd->get_stmt_da()->set_overwrite_status(false); } end_of_group= diff -Nru mariadb-10.11.11/sql/semisync_master.cc mariadb-10.11.13/sql/semisync_master.cc --- mariadb-10.11.11/sql/semisync_master.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/semisync_master.cc 2025-05-19 16:14:25.000000000 +0000 @@ -565,12 +565,14 @@ { lock(); DBUG_ASSERT(rpl_semi_sync_master_clients > 0); - if (!(--rpl_semi_sync_master_clients) && !rpl_semi_sync_master_wait_no_slave) + if (!(--rpl_semi_sync_master_clients) && !rpl_semi_sync_master_wait_no_slave + && get_master_enabled()) { /* Signal transactions waiting in commit_trx() that they do not have to wait anymore. */ + DBUG_ASSERT(m_active_tranxs); m_active_tranxs->clear_active_tranx_nodes(NULL, 0, signal_waiting_transaction); } diff -Nru mariadb-10.11.11/sql/semisync_slave.cc mariadb-10.11.13/sql/semisync_slave.cc --- mariadb-10.11.11/sql/semisync_slave.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/semisync_slave.cc 2025-05-19 16:14:25.000000000 +0000 @@ -141,7 +141,7 @@ DBUG_ASSERT(!debug_sync_set_action(mi->io_thd, STRING_WITH_LEN(act))); };); #endif - kill_connection(mi->mysql); + kill_connection(mi); } set_slave_enabled(0); @@ -158,8 +158,9 @@ } -void Repl_semi_sync_slave::kill_connection(MYSQL *mysql) +void Repl_semi_sync_slave::kill_connection(Master_info *mi) { + MYSQL *mysql= mi->mysql; if (!mysql) return; @@ -168,8 +169,8 @@ size_t kill_buffer_length; kill_mysql = mysql_init(kill_mysql); - mysql_options(kill_mysql, MYSQL_OPT_CONNECT_TIMEOUT, &m_kill_conn_timeout); - mysql_options(kill_mysql, MYSQL_OPT_READ_TIMEOUT, &m_kill_conn_timeout); + + setup_mysql_connection_for_master(kill_mysql, mi, m_kill_conn_timeout); mysql_options(kill_mysql, MYSQL_OPT_WRITE_TIMEOUT, &m_kill_conn_timeout); bool ret= (!mysql_real_connect(kill_mysql, mysql->host, diff -Nru mariadb-10.11.11/sql/semisync_slave.h mariadb-10.11.13/sql/semisync_slave.h --- mariadb-10.11.11/sql/semisync_slave.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/semisync_slave.h 2025-05-19 16:14:25.000000000 +0000 @@ -92,7 +92,7 @@ void slave_stop(Master_info *mi); void slave_reconnect(Master_info *mi); int request_transmit(Master_info *mi); - void kill_connection(MYSQL *mysql); + void kill_connection(Master_info *mi); private: /* True when init_object has been called */ diff -Nru mariadb-10.11.11/sql/signal_handler.cc mariadb-10.11.13/sql/signal_handler.cc --- mariadb-10.11.11/sql/signal_handler.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/signal_handler.cc 2025-05-19 16:14:25.000000000 +0000 @@ -277,7 +277,7 @@ my_safe_printf_stderr("Status: %s\n", kreason); my_safe_printf_stderr("Query (%p): ", thd->query()); my_safe_print_str(thd->query(), MY_MIN(65536U, thd->query_length())); - my_safe_printf_stderr("%s", "Optimizer switch: "); + my_safe_printf_stderr("%s", "\nOptimizer switch: "); ulonglong optsw= thd->variables.optimizer_switch; for (uint i= 0; optimizer_switch_names[i+1]; i++, optsw >>= 1) { diff -Nru mariadb-10.11.11/sql/slave.cc mariadb-10.11.13/sql/slave.cc --- mariadb-10.11.11/sql/slave.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/slave.cc 2025-05-19 16:14:25.000000000 +0000 @@ -3213,21 +3213,23 @@ if (full) protocol->store(mi->connection_name.str, mi->connection_name.length, &my_charset_bin); + mysql_mutex_lock(&mi->run_lock); + THD *sql_thd= mi->rli.sql_driver_thd; + const char *slave_sql_running_state= + sql_thd ? sql_thd->get_proc_info() : ""; + THD *io_thd= mi->io_thd; + const char *slave_io_running_state= io_thd ? io_thd->get_proc_info() : ""; + mysql_mutex_unlock(&mi->run_lock); + if (full) - { /* Show what the sql driver replication thread is doing This is only meaningful if there is only one slave thread. */ - msg= (mi->rli.sql_driver_thd ? - mi->rli.sql_driver_thd->get_proc_info() : ""); - protocol->store_string_or_null(msg, &my_charset_bin); - } - msg= mi->io_thd ? mi->io_thd->get_proc_info() : ""; - protocol->store_string_or_null(msg, &my_charset_bin); + protocol->store_string_or_null(slave_sql_running_state, &my_charset_bin); - mysql_mutex_unlock(&mi->run_lock); + protocol->store_string_or_null(slave_io_running_state, &my_charset_bin); mysql_mutex_lock(&mi->data_lock); mysql_mutex_lock(&mi->rli.data_lock); @@ -3401,10 +3403,6 @@ protocol->store((uint32) mi->rli.get_sql_delay()); // SQL_Remaining_Delay - // THD::proc_info is not protected by any lock, so we read it once - // to ensure that we use the same value throughout this function. - const char *slave_sql_running_state= - mi->rli.sql_driver_thd ? mi->rli.sql_driver_thd->proc_info : ""; if (slave_sql_running_state == stage_sql_thd_waiting_until_delay.m_name) { time_t t= my_time(0), sql_delay_end= mi->rli.get_sql_delay_end(); @@ -5485,6 +5483,7 @@ THD *thd; /* needs to be first for thread_stack */ char saved_log_name[FN_REFLEN]; char saved_master_log_name[FN_REFLEN]; + bool thd_initialized= 0; my_off_t UNINIT_VAR(saved_log_pos); my_off_t UNINIT_VAR(saved_master_log_pos); String saved_skip_gtid_pos; @@ -5587,6 +5586,7 @@ thd->variables.alter_algorithm= (ulong) Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT; server_threads.insert(thd); + thd_initialized= 1; /* We are going to set slave_running to 1. Assuming slave I/O thread is alive and connected, this is going to make Seconds_Behind_Master be 0 @@ -5966,7 +5966,7 @@ } THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit); thd->add_status_to_global(); - server_threads.erase(thd); + THD_STAGE_INFO(thd, stage_slave_sql_cleanup); mysql_mutex_lock(&rli->run_lock); err_during_init: @@ -5980,9 +5980,9 @@ rli->relay_log.description_event_for_exec= 0; rli->reset_inuse_relaylog(); /* Wake up master_pos_wait() */ - mysql_mutex_unlock(&rli->data_lock); DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions")); mysql_cond_broadcast(&rli->data_cond); + mysql_mutex_unlock(&rli->data_lock); rli->ignore_log_space_limit= 0; /* don't need any lock */ /* we die so won't remember charset - re-update them on next thread start */ thd->system_thread_info.rpl_sql_info->cached_charset_invalidate(); @@ -6037,6 +6037,8 @@ rpl_parallel_resize_pool_if_no_slaves(); delete serial_rgi; + if (thd_initialized) + server_threads.erase(thd); delete thd; DBUG_LEAVE; // Must match DBUG_ENTER() @@ -7616,50 +7618,10 @@ if (opt_slave_compressed_protocol) client_flag|= CLIENT_COMPRESS; /* We will use compression */ - mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout); - mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout); + setup_mysql_connection_for_master(mi->mysql, mi, slave_net_timeout); mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY, (char*) &my_true); -#ifdef HAVE_OPENSSL - if (mi->ssl) - { - mysql_ssl_set(mysql, - mi->ssl_key[0]?mi->ssl_key:0, - mi->ssl_cert[0]?mi->ssl_cert:0, - mi->ssl_ca[0]?mi->ssl_ca:0, - mi->ssl_capath[0]?mi->ssl_capath:0, - mi->ssl_cipher[0]?mi->ssl_cipher:0); - mysql_options(mysql, MYSQL_OPT_SSL_CRL, - mi->ssl_crl[0] ? mi->ssl_crl : 0); - mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH, - mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0); - mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT, - &mi->ssl_verify_server_cert); - } -#endif - - /* - If server's default charset is not supported (like utf16, utf32) as client - charset, then set client charset to 'latin1' (default client charset). - */ - if (is_supported_parser_charset(default_charset_info)) - mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->cs_name.str); - else - { - sql_print_information("'%s' can not be used as client character set. " - "'%s' will be used as default client character set " - "while connecting to master.", - default_charset_info->cs_name.str, - default_client_charset_info->cs_name.str); - mysql_options(mysql, MYSQL_SET_CHARSET_NAME, - default_client_charset_info->cs_name.str); - } - - /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */ - if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr) - mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr); - /* we disallow empty users */ if (mi->user[0] == 0) { diff -Nru mariadb-10.11.11/sql/sp_head.cc mariadb-10.11.13/sql/sp_head.cc --- mariadb-10.11.11/sql/sp_head.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sp_head.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1531,7 +1531,7 @@ thd->wsrep_cs().reset_error(); /* Reset also thd->killed if it has been set during BF abort. */ if (killed_mask_hard(thd->killed) == KILL_QUERY) - thd->killed= NOT_KILLED; + thd->reset_killed(); /* if failed transaction was not replayed, must return with error from here */ if (!must_replay) err_status = 1; } @@ -2552,6 +2552,16 @@ if (!spvar) DBUG_RETURN(FALSE); + if (!spvar->field_def.type_handler()->is_scalar_type() && + dynamic_cast(arg_item)) + { + // Item_param cannot store values of non-scalar data types yet + my_error(ER_ILLEGAL_PARAMETER_DATA_TYPE_FOR_OPERATION, MYF(0), + spvar->field_def.type_handler()->name().ptr(), + "EXECUTE ... USING ?"); + DBUG_RETURN(true); + } + if (spvar->mode != sp_variable::MODE_IN) { Settable_routine_parameter *srp= diff -Nru mariadb-10.11.11/sql/sql_acl.cc mariadb-10.11.13/sql/sql_acl.cc --- mariadb-10.11.11/sql/sql_acl.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_acl.cc 2025-05-19 16:14:25.000000000 +0000 @@ -8433,19 +8433,13 @@ /* If sequence is used as part of NEXT VALUE, PREVIOUS VALUE or SELECT, - we need to modify the requested access rights depending on how the - sequence is used. + the privilege will be checked in ::fix_fields(). + Direct SELECT of a sequence table doesn't set t_ref->sequence, so + privileges will be checked normally, as for any table. */ if (t_ref->sequence && !(want_access & ~(SELECT_ACL | INSERT_ACL | UPDATE_ACL | DELETE_ACL))) - { - /* - We want to have either SELECT or INSERT rights to sequences depending - on how they are accessed - */ - orig_want_access= ((t_ref->lock_type >= TL_FIRST_WRITE) ? - INSERT_ACL : SELECT_ACL); - } + continue; const ACL_internal_table_access *access= get_cached_table_access(&t_ref->grant.m_internal, @@ -13111,6 +13105,9 @@ return dup; } + if (!initialized) + return dup; + if (lock) mysql_mutex_lock(&acl_cache->lock); if (find_acl_role(dup->user.str, false)) diff -Nru mariadb-10.11.11/sql/sql_base.cc mariadb-10.11.13/sql/sql_base.cc --- mariadb-10.11.11/sql/sql_base.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_base.cc 2025-05-19 16:14:25.000000000 +0000 @@ -19,6 +19,7 @@ #include "mariadb.h" #include "sql_base.h" // setup_table_map +#include "sql_list.h" #include "sql_priv.h" #include "unireg.h" #include "debug_sync.h" @@ -781,6 +782,7 @@ } } +#ifdef DBUG_ASSERT_EXISTS static inline bool check_field_pointers(const TABLE *table) { for (Field **pf= table->field; *pf; pf++) @@ -796,6 +798,7 @@ } return true; } +#endif int close_thread_tables_for_query(THD *thd) @@ -1173,7 +1176,6 @@ t_name= &table->table_name; t_alias= &table->alias; -retry: DBUG_PRINT("info", ("real table: %s.%s", d_name->str, t_name->str)); for (TABLE_LIST *tl= table_list; tl ; tl= tl->next_global, res= 0) { @@ -1235,28 +1237,53 @@ DBUG_PRINT("info", ("found same copy of table or table which we should skip")); } - if (res && res->belong_to_derived) - { - /* - We come here for queries of type: - INSERT INTO t1 (SELECT tmp.a FROM (select * FROM t1) as tmp); + DBUG_RETURN(res); +} - Try to fix by materializing the derived table - */ - TABLE_LIST *derived= res->belong_to_derived; - if (derived->is_merged_derived() && !derived->derived->is_excluded()) + +TABLE_LIST* unique_table_in_select_list(THD *thd, TABLE_LIST *table, SELECT_LEX *sel) +{ + subselect_table_finder_param param= {thd, table, NULL}; + List_iterator_fast it(sel->item_list); + Item *item; + while ((item= it++)) + { + if (item->walk(&Item::subselect_table_finder_processor, FALSE, ¶m)) { - DBUG_PRINT("info", - ("convert merged to materialization to resolve the conflict")); - derived->change_refs_to_fields(); - derived->set_materialized_derived(); - goto retry; + if (param.dup == NULL) + return ERROR_TABLE; + return param.dup; } + DBUG_ASSERT(param.dup == NULL); } - DBUG_RETURN(res); + return NULL; } +typedef TABLE_LIST* (*find_table_callback)(THD *thd, TABLE_LIST *table, + TABLE_LIST *table_list, + uint check_flag, SELECT_LEX *sel); + +static +TABLE_LIST* +find_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list, + uint check_flag, SELECT_LEX *sel, find_table_callback callback ); + +TABLE_LIST* unique_table_callback(THD *thd, TABLE_LIST *table, + TABLE_LIST *table_list, + uint check_flag, SELECT_LEX *sel) +{ + return find_dup_table(thd, table, table_list, check_flag); +} + + +TABLE_LIST* unique_in_sel_table_callback(THD *thd, TABLE_LIST *table, + TABLE_LIST *table_list, + uint check_flag, SELECT_LEX *sel) +{ + return unique_table_in_select_list(thd, table, sel); +} + /** Test that the subject table of INSERT/UPDATE/DELETE/CREATE or (in case of MyISAMMRG) one of its children are not used later @@ -1276,6 +1303,25 @@ unique_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list, uint check_flag) { + return find_table(thd, table, table_list, check_flag, NULL, + &unique_table_callback); +} + + +TABLE_LIST* +unique_table_in_insert_returning_subselect(THD *thd, TABLE_LIST *table, SELECT_LEX *sel) +{ + return find_table(thd, table, NULL, 0, sel, + &unique_in_sel_table_callback); + +} + + +static +TABLE_LIST* +find_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list, + uint check_flag, SELECT_LEX *sel, find_table_callback callback ) +{ TABLE_LIST *dup; table= table->find_table_for_update(); @@ -1306,12 +1352,12 @@ if (!tmp_parent) break; - if ((dup= find_dup_table(thd, child, child->next_global, check_flag))) + if ((dup= (*callback)(thd, child, child->next_global, check_flag, sel))) break; } } else - dup= find_dup_table(thd, table, table_list, check_flag); + dup= (*callback)(thd, table, table_list, check_flag, sel); return dup; } @@ -4561,6 +4607,7 @@ } thd->current_tablenr= 0; + sroutine_to_open= &thd->lex->sroutines_list.first; restart: /* @@ -4576,7 +4623,6 @@ has_prelocking_list= thd->lex->requires_prelocking(); table_to_open= start; - sroutine_to_open= &thd->lex->sroutines_list.first; *counter= 0; THD_STAGE_INFO(thd, stage_opening_tables); prelocking_strategy->reset(thd); @@ -4673,7 +4719,7 @@ elements from the table list (if MERGE tables are involved), */ close_tables_for_reopen(thd, start, ot_ctx.start_of_statement_svp(), - ot_ctx.remove_implicitly_used_deps()); + false); /* Here we rely on the fact that 'tables' still points to the valid @@ -4741,10 +4787,9 @@ /* F.ex. deadlock happened */ if (ot_ctx.can_recover_from_failed_open()) { - DBUG_ASSERT(ot_ctx.remove_implicitly_used_deps()); close_tables_for_reopen(thd, start, ot_ctx.start_of_statement_svp(), - ot_ctx.remove_implicitly_used_deps()); + true); if (ot_ctx.recover_from_failed_open()) goto error; @@ -4753,6 +4798,7 @@ goto error; error= FALSE; + sroutine_to_open= &thd->lex->sroutines_list.first; goto restart; } /* @@ -6034,19 +6080,19 @@ trying to reopen tables. NULL if no metadata locks were held and thus all metadata locks should be released. - @param[in] remove_implicit_deps True in case routines and tables implicitly + @param[in] remove_indirect True in case routines and tables implicitly used by a statement should be removed. */ void close_tables_for_reopen(THD *thd, TABLE_LIST **tables, const MDL_savepoint &start_of_statement_svp, - bool remove_implicit_deps) + bool remove_indirect) { - TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table(); TABLE_LIST *tmp; - if (remove_implicit_deps) + if (remove_indirect) { + TABLE_LIST *first_not_own_table= thd->lex->first_not_own_table(); /* If table list consists only from tables from prelocking set, table list for new attempt should be empty, so we have to update list's root pointer. @@ -7412,82 +7458,83 @@ if (!found) continue; // No matching field + /* Restore field_2 to point to the field which was a match for field_1. */ + field_2= nj_col_2->field(); + /* field_1 and field_2 have the same names. Check if they are in the USING clause (if present), mark them as common fields, and add a new equi-join condition to the ON clause. */ - if (nj_col_2) - { - /* - Create non-fixed fully qualified field and let fix_fields to - resolve it. - */ - Item *item_1= nj_col_1->create_item(thd); - Item *item_2= nj_col_2->create_item(thd); - Item_ident *item_ident_1, *item_ident_2; - Item_func_eq *eq_cond; - if (!item_1 || !item_2) - goto err; // out of memory + /* + Create non-fixed fully qualified field and let fix_fields to + resolve it. + */ + Item *item_1= nj_col_1->create_item(thd); + Item *item_2= nj_col_2->create_item(thd); + Item_ident *item_ident_1, *item_ident_2; + Item_func_eq *eq_cond; - /* - The following assert checks that the two created items are of - type Item_ident. - */ - DBUG_ASSERT(!thd->lex->current_select->no_wrap_view_item); - /* - In the case of no_wrap_view_item == 0, the created items must be - of sub-classes of Item_ident. - */ - DBUG_ASSERT(item_1->type() == Item::FIELD_ITEM || - item_1->type() == Item::REF_ITEM); - DBUG_ASSERT(item_2->type() == Item::FIELD_ITEM || - item_2->type() == Item::REF_ITEM); + if (!item_1 || !item_2) + goto err; // out of memory - /* - We need to cast item_1,2 to Item_ident, because we need to hook name - resolution contexts specific to each item. - */ - item_ident_1= (Item_ident*) item_1; - item_ident_2= (Item_ident*) item_2; - /* - Create and hook special name resolution contexts to each item in the - new join condition . We need this to both speed-up subsequent name - resolution of these items, and to enable proper name resolution of - the items during the execute phase of PS. - */ - if (set_new_item_local_context(thd, item_ident_1, nj_col_1->table_ref) || - set_new_item_local_context(thd, item_ident_2, nj_col_2->table_ref)) - goto err; + /* + The following assert checks that the two created items are of + type Item_ident. + */ + DBUG_ASSERT(!thd->lex->current_select->no_wrap_view_item); + /* + In the case of no_wrap_view_item == 0, the created items must be + of sub-classes of Item_ident. + */ + DBUG_ASSERT(item_1->type() == Item::FIELD_ITEM || + item_1->type() == Item::REF_ITEM); + DBUG_ASSERT(item_2->type() == Item::FIELD_ITEM || + item_2->type() == Item::REF_ITEM); - if (!(eq_cond= new (thd->mem_root) Item_func_eq(thd, item_ident_1, item_ident_2))) - goto err; /* Out of memory. */ + /* + We need to cast item_1,2 to Item_ident, because we need to hook name + resolution contexts specific to each item. + */ + item_ident_1= (Item_ident*) item_1; + item_ident_2= (Item_ident*) item_2; + /* + Create and hook special name resolution contexts to each item in the + new join condition . We need this to both speed-up subsequent name + resolution of these items, and to enable proper name resolution of + the items during the execute phase of PS. + */ + if (set_new_item_local_context(thd, item_ident_1, nj_col_1->table_ref) || + set_new_item_local_context(thd, item_ident_2, nj_col_2->table_ref)) + goto err; - /* - Add the new equi-join condition to the ON clause. Notice that - fix_fields() is applied to all ON conditions in setup_conds() - so we don't do it here. - */ - add_join_on(thd, (table_ref_1->outer_join & JOIN_TYPE_RIGHT ? - table_ref_1 : table_ref_2), - eq_cond); - - nj_col_1->is_common= nj_col_2->is_common= TRUE; - DBUG_PRINT ("info", ("%s.%s and %s.%s are common", - nj_col_1->safe_table_name(), - nj_col_1->name()->str, - nj_col_2->safe_table_name(), - nj_col_2->name()->str)); - - if (field_1) - update_field_dependencies(thd, field_1, field_1->table); - if (field_2) - update_field_dependencies(thd, field_2, field_2->table); + if (!(eq_cond= new (thd->mem_root) Item_func_eq(thd, item_ident_1, item_ident_2))) + goto err; /* Out of memory. */ - if (using_fields != NULL) - ++(*found_using_fields); - } + /* + Add the new equi-join condition to the ON clause. Notice that + fix_fields() is applied to all ON conditions in setup_conds() + so we don't do it here. + */ + add_join_on(thd, (table_ref_1->outer_join & JOIN_TYPE_RIGHT ? + table_ref_1 : table_ref_2), + eq_cond); + + nj_col_1->is_common= nj_col_2->is_common= TRUE; + DBUG_PRINT ("info", ("%s.%s and %s.%s are common", + nj_col_1->safe_table_name(), + nj_col_1->name()->str, + nj_col_2->safe_table_name(), + nj_col_2->name()->str)); + + if (field_1) + update_field_dependencies(thd, field_1, field_1->table); + if (field_2) + update_field_dependencies(thd, field_2, field_2->table); + + if (using_fields != NULL) + ++(*found_using_fields); } if (leaf_1) leaf_1->is_join_columns_complete= TRUE; @@ -8392,7 +8439,7 @@ if (table_list->belong_to_view && !table_list->view && check_single_table_access(thd, access, table_list, FALSE)) { - tables->hide_view_error(thd); + tables->replace_view_error_with_generic(thd); DBUG_RETURN(TRUE); } access= want_access; @@ -8897,14 +8944,15 @@ } -static void unwind_stored_field_offsets(const List &fields, Field *end) +static void unwind_stored_field_offsets(const List &fields, Item_field *end) { - for (Item &item_field: fields) + for (Item &item: fields) { - Field *f= item_field.field_for_view_update()->field; - if (f == end) + Item_field *item_field= item.field_for_view_update(); + if (item_field == end) break; + Field *f= item_field->field; if (f->stored_in_db()) { TABLE *table= f->table; @@ -8948,7 +8996,7 @@ { List_iterator_fast f(fields),v(values); Item *value, *fld; - Item_field *field; + Item_field *field= NULL; Field *rfield; TABLE *table; bool only_unvers_fields= update && table_arg->versioned(); @@ -8966,11 +9014,8 @@ while ((fld= f++)) { - if (!(field= fld->field_for_view_update())) - { - my_error(ER_NONUPDATEABLE_COLUMN, MYF(0), fld->name.str); - goto err_unwind_fields; - } + field= fld->field_for_view_update(); + DBUG_ASSERT(field); // ensured by check_fields or check_view_insertability. value=v++; DBUG_ASSERT(value); rfield= field->field; @@ -9038,7 +9083,7 @@ DBUG_RETURN(thd->is_error()); err_unwind_fields: if (update && thd->variables.sql_mode & MODE_SIMULTANEOUS_ASSIGNMENT) - unwind_stored_field_offsets(fields, rfield); + unwind_stored_field_offsets(fields, field); err: DBUG_PRINT("error",("got error")); thd->abort_on_warning= save_abort_on_warning; @@ -9407,9 +9452,11 @@ memcpy(path_copy, path, path_len - ext_len); path_copy[path_len - ext_len]= 0; init_tmp_table_share(thd, &share, "", 0, "", path_copy); - handlerton *ht= share.db_type(); if (!open_table_def(thd, &share)) - ht->drop_table(share.db_type(), path_copy); + { + handlerton *ht= share.db_type(); + ht->drop_table(ht, path_copy); + } free_table_share(&share); } /* diff -Nru mariadb-10.11.11/sql/sql_base.h mariadb-10.11.13/sql/sql_base.h --- mariadb-10.11.11/sql/sql_base.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_base.h 2025-05-19 16:14:25.000000000 +0000 @@ -157,7 +157,7 @@ my_bool mysql_rm_tmp_tables(void); void close_tables_for_reopen(THD *thd, TABLE_LIST **tables, const MDL_savepoint &start_of_statement_svp, - bool remove_implicit_dependencies); + bool remove_indirect); bool table_already_fk_prelocked(TABLE_LIST *tl, LEX_CSTRING *db, LEX_CSTRING *table, thr_lock_type lock_type); TABLE_LIST *find_table_in_list(TABLE_LIST *table, @@ -296,6 +296,8 @@ bool lock_tables(THD *thd, TABLE_LIST *tables, uint counter, uint flags); int decide_logging_format(THD *thd, TABLE_LIST *tables); void close_thread_table(THD *thd, TABLE **table_ptr); +TABLE_LIST* +unique_table_in_insert_returning_subselect(THD *thd, TABLE_LIST *table, SELECT_LEX *sel); TABLE_LIST *unique_table(THD *thd, TABLE_LIST *table, TABLE_LIST *table_list, uint check_flag); bool is_equal(const LEX_CSTRING *a, const LEX_CSTRING *b); @@ -568,23 +570,6 @@ return m_timeout; } - /** - Return true in case tables and routines the statement implicilty - dependent on should be removed, else return false. - - @note The use case when routines and tables the statement implicitly - dependent on shouldn't be removed is the one when a new partition be - created on handling the INSERT statement against a versioning partitioned - table. For this case re-opening a versioning table would result in adding - implicitly dependent routines (e.g. table's triggers) that lead to - allocation of memory on PS mem_root and so leaking a memory until the PS - statement be deallocated. - */ - bool remove_implicitly_used_deps() const - { - return m_action != OT_ADD_HISTORY_PARTITION; - } - uint get_flags() const { return m_flags; } /** diff -Nru mariadb-10.11.11/sql/sql_cache.cc mariadb-10.11.13/sql/sql_cache.cc --- mariadb-10.11.11/sql/sql_cache.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_cache.cc 2025-05-19 16:14:25.000000000 +0000 @@ -3553,6 +3553,7 @@ if (table_block == 0) { DBUG_PRINT("qcache", ("Can't write table name to cache")); + node->parent= NULL; DBUG_RETURN(0); } Query_cache_table *header= table_block->table(); @@ -3576,6 +3577,7 @@ DBUG_PRINT("qcache", ("Can't insert table to hash")); // write_block_data return locked block free_memory_block(table_block); + node->parent= NULL; DBUG_RETURN(0); } char *db= header->db(); diff -Nru mariadb-10.11.11/sql/sql_class.cc mariadb-10.11.13/sql/sql_class.cc --- mariadb-10.11.11/sql/sql_class.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_class.cc 2025-05-19 16:14:25.000000000 +0000 @@ -8381,6 +8381,24 @@ } +void +wait_for_commit::prior_commit_error(THD *thd) +{ + /* + Only raise a "prior commit failed" error if we didn't already raise + an error. + + The ER_PRIOR_COMMIT_FAILED is just an internal mechanism to ensure that a + transaction does not commit successfully if a prior commit failed, so that + the parallel replication worker threads stop in an orderly fashion when + one of them get an error. Thus, if another worker already got another real + error, overriding it with ER_PRIOR_COMMIT_FAILED is not useful. + */ + if (!thd->get_stmt_da()->is_set()) + my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); +} + + /* Wakeup anyone waiting for us to have committed. diff -Nru mariadb-10.11.11/sql/sql_class.h mariadb-10.11.13/sql/sql_class.h --- mariadb-10.11.11/sql/sql_class.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_class.h 2025-05-19 16:14:25.000000000 +0000 @@ -2383,8 +2383,8 @@ return wait_for_prior_commit2(thd, allow_kill); else { - if (wakeup_error) - my_error(ER_PRIOR_COMMIT_FAILED, MYF(0)); + if (unlikely(wakeup_error)) + prior_commit_error(thd); return wakeup_error; } } @@ -2435,6 +2435,7 @@ void wakeup(int wakeup_error); int wait_for_prior_commit2(THD *thd, bool allow_kill); + void prior_commit_error(THD *thd); void wakeup_subsequent_commits2(int wakeup_error); void unregister_wait_for_prior_commit2(); diff -Nru mariadb-10.11.11/sql/sql_cmd.h mariadb-10.11.13/sql/sql_cmd.h --- mariadb-10.11.11/sql/sql_cmd.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_cmd.h 2025-05-19 16:14:25.000000000 +0000 @@ -141,6 +141,7 @@ handlerton **ha, bool tmp_table); bool is_set() { return m_storage_engine_name.str != NULL; } + const LEX_CSTRING *name() const { return &m_storage_engine_name; } }; diff -Nru mariadb-10.11.11/sql/sql_db.cc mariadb-10.11.13/sql/sql_db.cc --- mariadb-10.11.11/sql/sql_db.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_db.cc 2025-05-19 16:14:25.000000000 +0000 @@ -536,36 +536,53 @@ DESCRIPTION + create->default_table_charset is guaranteed to be alway set + Required by some callers + RETURN VALUES 0 File found - 1 No database file or could not open it - + -1 No database file (file was not found or 'empty' file was cached) + 1 Could not open it */ -bool load_db_opt(THD *thd, const char *path, Schema_specification_st *create) +int load_db_opt(THD *thd, const char *path, Schema_specification_st *create) { File file; char buf[256+DATABASE_COMMENT_MAXLEN]; DBUG_ENTER("load_db_opt"); - bool error=1; + int error= 0; size_t nbytes; myf utf8_flag= thd->get_utf8_flag(); bzero((char*) create,sizeof(*create)); - create->default_table_charset= thd->variables.collation_server; /* Check if options for this database are already in the hash */ if (!get_dbopt(thd, path, create)) - DBUG_RETURN(0); + { + if (!create->default_table_charset) + error= -1; // db.opt did not exists + goto err1; + } /* Otherwise, load options from the .opt file */ if ((file= mysql_file_open(key_file_dbopt, path, O_RDONLY | O_SHARE, MYF(0))) < 0) + { + /* + Create an empty entry, to avoid doing an extra file open for every create + table. + */ + put_dbopt(path, create); + error= -1; goto err1; + } IO_CACHE cache; if (init_io_cache(&cache, file, IO_SIZE, READ_CACHE, 0, 0, MYF(0))) - goto err2; + { + error= 1; + goto err2; // Not cached + } while ((int) (nbytes= my_b_gets(&cache, (char*) buf, sizeof(buf))) > 0) { @@ -586,7 +603,7 @@ default-collation commands. */ if (!(create->default_table_charset= - get_charset_by_csname(pos+1, MY_CS_PRIMARY, MYF(utf8_flag))) && + get_charset_by_csname(pos+1, MY_CS_PRIMARY, MYF(utf8_flag))) && !(create->default_table_charset= get_charset_by_name(pos+1, MYF(utf8_flag)))) { @@ -621,10 +638,11 @@ err2: mysql_file_close(file, MYF(0)); err1: + if (!create->default_table_charset) // In case of error + create->default_table_charset= thd->variables.collation_server; DBUG_RETURN(error); } - /* Retrieve database options by name. Load database options file or fetch from cache. @@ -651,11 +669,12 @@ db_create_info right after that. RETURN VALUES (read NOTE!) - FALSE Success - TRUE Failed to retrieve options + 0 File found + -1 No database file (file was not found or 'empty' file was cached) + 1 Could not open it */ -bool load_db_opt_by_name(THD *thd, const char *db_name, +int load_db_opt_by_name(THD *thd, const char *db_name, Schema_specification_st *db_create_info) { char db_opt_path[FN_REFLEN + 1]; @@ -1951,8 +1970,7 @@ build_table_filename(path, sizeof(path)-1, old_db->str, "", MY_DB_OPT_FILE, 0); - if ((load_db_opt(thd, path, &create_info))) - create_info.default_table_charset= thd->variables.collation_server; + load_db_opt(thd, path, &create_info); length= build_table_filename(path, sizeof(path)-1, old_db->str, "", "", 0); if (length && path[length-1] == FN_LIBCHAR) diff -Nru mariadb-10.11.11/sql/sql_db.h mariadb-10.11.13/sql/sql_db.h --- mariadb-10.11.11/sql/sql_db.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_db.h 2025-05-19 16:14:25.000000000 +0000 @@ -37,8 +37,8 @@ bool my_dboptions_cache_init(void); void my_dboptions_cache_free(void); bool check_db_dir_existence(const char *db_name); -bool load_db_opt(THD *thd, const char *path, Schema_specification_st *create); -bool load_db_opt_by_name(THD *thd, const char *db_name, +int load_db_opt(THD *thd, const char *path, Schema_specification_st *create); +int load_db_opt_by_name(THD *thd, const char *db_name, Schema_specification_st *db_create_info); CHARSET_INFO *get_default_db_collation(THD *thd, const char *db_name); bool my_dbopt_init(void); diff -Nru mariadb-10.11.11/sql/sql_error.cc mariadb-10.11.13/sql/sql_error.cc --- mariadb-10.11.11/sql/sql_error.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_error.cc 2025-05-19 16:14:25.000000000 +0000 @@ -318,18 +318,16 @@ #endif get_warning_info()->clear_error_condition(); set_is_sent(false); - /** Tiny reset in debug mode to see garbage right away */ - if (!is_bulk_op()) - /* - For BULK DML operations (e.g. UPDATE) the data member m_status - has the value DA_OK_BULK. Keep this value in order to handle - m_affected_rows, m_statement_warn_count in correct way. Else, - the number of rows and the number of warnings affected by - the last statement executed as part of a trigger fired by the dml - (e.g. UPDATE statement fires a trigger on AFTER UPDATE) would counts - rows modified by trigger's statement. - */ - m_status= DA_EMPTY; + /* + For BULK DML operations (e.g. UPDATE) the data member m_status + has the value DA_OK_BULK. Keep this value in order to handle + m_affected_rows, m_statement_warn_count in correct way. Else, + the number of rows and the number of warnings affected by + the last statement executed as part of a trigger fired by the dml + (e.g. UPDATE statement fires a trigger on AFTER UPDATE) would counts + rows modified by trigger's statement. + */ + m_status= is_bulk_op() ? DA_OK_BULK : DA_EMPTY; DBUG_VOID_RETURN; } diff -Nru mariadb-10.11.11/sql/sql_insert.cc mariadb-10.11.13/sql/sql_insert.cc --- mariadb-10.11.11/sql/sql_insert.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_insert.cc 2025-05-19 16:14:25.000000000 +0000 @@ -57,6 +57,7 @@ */ #include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */ +#include "sql_list.h" #include "sql_priv.h" #include "sql_insert.h" #include "sql_update.h" // compare_record @@ -728,6 +729,8 @@ Name_resolution_context_state ctx_state; SELECT_LEX *returning= thd->lex->has_returning() ? thd->lex->returning() : 0; unsigned char *readbuff= NULL; + List insert_values_cache; + bool cache_insert_values= FALSE; #ifndef EMBEDDED_LIBRARY char *query= thd->query(); @@ -785,7 +788,7 @@ if ((res= mysql_prepare_insert(thd, table_list, fields, values, update_fields, update_values, duplic, ignore, - &unused_conds, FALSE))) + &unused_conds, FALSE, &cache_insert_values))) { retval= thd->is_error(); if (res < 0) @@ -1033,8 +1036,41 @@ if (returning) fix_rownum_pointers(thd, thd->lex->returning(), &info.accepted_rows); + if (cache_insert_values) + { + insert_values_cache.empty(); + while ((values= its++)) + { + List *caches= new (thd->mem_root) List_item; + List_iterator_fast iv(*values); + Item *item; + if (caches == 0) + { + error= 1; + goto values_loop_end; + } + caches->empty(); + while((item= iv++)) + { + Item_cache *cache= item->get_cache(thd); + if (!cache) + { + error= 1; + goto values_loop_end; + } + cache->setup(thd, item); + caches->push_back(cache); + } + insert_values_cache.push_back(caches); + } + its.rewind(); + } + do { + List_iterator_fast itc(insert_values_cache); + List_iterator_fast *itr; + DBUG_PRINT("info", ("iteration %llu", iteration)); if (iteration && bulk_parameters_set(thd)) { @@ -1042,7 +1078,24 @@ goto values_loop_end; } - while ((values= its++)) + if (cache_insert_values) + { + List_item *caches; + while ((caches= itc++)) + { + List_iterator_fast ic(*caches); + Item_cache *cache; + while((cache= (Item_cache*) ic++)) + { + cache->cache_value(); + } + } + itc.rewind(); + itr= &itc; + } + else + itr= &its; + while ((values= (*itr)++)) { thd->get_stmt_da()->inc_current_row_for_warning(); if (fields.elements || !value_count) @@ -1146,7 +1199,7 @@ break; info.accepted_rows++; } - its.rewind(); + itr->rewind(); iteration++; } while (bulk_parameters_iterations(thd)); @@ -1657,6 +1710,7 @@ table_list Global/local table list where Where clause (for insert ... select) select_insert TRUE if INSERT ... SELECT statement + cache_insert_values insert's VALUES(...) has to be pre-computed TODO (in far future) In cases of: @@ -1679,7 +1733,7 @@ List &update_fields, List &update_values, enum_duplicates duplic, bool ignore, COND **where, - bool select_insert) + bool select_insert, bool * const cache_insert_values) { SELECT_LEX *select_lex= thd->lex->first_select_lex(); Name_resolution_context *context= &select_lex->context; @@ -1783,6 +1837,15 @@ thd->vers_insert_history(row_start); // check privileges } + /* + Check if we read from the same table we're inserting into. + Queries like INSERT INTO t1 VALUES ((SELECT ... FROM t1...)) have + to pre-compute the VALUES part. + Reading from the same table in the RETURNING clause is not allowed. + + INSERT...SELECT detects this case in select_insert::prepare and also + uses buffering to handle it correcly. + */ if (!select_insert) { Item *fake_conds= 0; @@ -1790,10 +1853,30 @@ if ((duplicate= unique_table(thd, table_list, table_list->next_global, CHECK_DUP_ALLOW_DIFFERENT_ALIAS))) { - update_non_unique_table_error(table_list, "INSERT", duplicate); - DBUG_RETURN(1); + /* + This is INSERT INTO ... VALUES (...) and it must pre-compute the + values to be inserted. + */ + (*cache_insert_values)= true; } + else + (*cache_insert_values)= false; + select_lex->fix_prepare_information(thd, &fake_conds, &fake_conds); + + if ((*cache_insert_values) && thd->lex->has_returning()) + { + // Check if the table we're inserting into is also in RETURNING clause + TABLE_LIST *dup= + unique_table_in_insert_returning_subselect(thd, table_list, + thd->lex->returning()); + if (dup) + { + if (dup != ERROR_TABLE) + update_non_unique_table_error(table_list, "INSERT", duplicate); + DBUG_RETURN(1); + } + } } /* Only call prepare_for_posistion() if we are not performing a DELAYED @@ -3930,6 +4013,7 @@ int res; LEX *lex= thd->lex; SELECT_LEX *select_lex= lex->first_select_lex(); + bool cache_insert_values= false; DBUG_ENTER("mysql_insert_select_prepare"); /* @@ -3940,7 +4024,7 @@ if ((res= mysql_prepare_insert(thd, lex->query_tables, lex->field_list, 0, lex->update_list, lex->value_list, lex->duplicates, lex->ignore, - &select_lex->where, TRUE))) + &select_lex->where, TRUE, &cache_insert_values))) DBUG_RETURN(res); /* @@ -4227,6 +4311,7 @@ int select_insert::prepare2(JOIN *) { DBUG_ENTER("select_insert::prepare2"); + switch_to_nullable_trigger_fields(*fields, table); if (table->validate_default_values_of_unset_fields(thd)) DBUG_RETURN(1); if (thd->lex->describe) @@ -4348,7 +4433,11 @@ bool select_insert::prepare_eof() { int error; - bool const trans_table= table->file->has_transactions_and_rollback(); + // make sure any ROW format pending event is logged in the same binlog cache + bool const trans_table= (thd->is_current_stmt_binlog_format_row() && + table->file->row_logging) ? + table->file->row_logging_has_trans : + table->file->has_transactions_and_rollback(); bool changed; bool binary_logged= 0; killed_state killed_status= thd->killed; @@ -4527,7 +4616,7 @@ table->file->ha_rnd_end(); table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); - + table->file->extra(HA_EXTRA_ABORT_ALTER_COPY); /* If at least one row has been inserted/modified and will stay in the table (the table doesn't have transactions) we must write to @@ -4573,7 +4662,8 @@ query_cache_invalidate3(thd, table, 1); } DBUG_ASSERT(transactional_table || !changed || - thd->transaction->stmt.modified_non_trans_table); + (thd->transaction->stmt.modified_non_trans_table || + thd->transaction->all.modified_non_trans_table)); table->s->table_creation_was_logged|= binary_logged; table->file->ha_release_auto_increment(); @@ -5266,9 +5356,14 @@ /* Remember xid's for the case of row based logging */ ddl_log_update_xid(&ddl_log_state_create, thd->binlog_xid); ddl_log_update_xid(&ddl_log_state_rm, thd->binlog_xid); - trans_commit_stmt(thd); - if (!(thd->variables.option_bits & OPTION_GTID_BEGIN)) - trans_commit_implicit(thd); + if (trans_commit_stmt(thd) || + (!(thd->variables.option_bits & OPTION_GTID_BEGIN) && + trans_commit_implicit(thd))) + { + abort_result_set(); + DBUG_RETURN(true); + } + thd->binlog_xid= 0; #ifdef WITH_WSREP @@ -5388,7 +5483,13 @@ /* possible error of writing binary log is ignored deliberately */ (void) thd->binlog_flush_pending_rows_event(TRUE, TRUE); + /* + In the error case, we remove any partially created table. So clear any + incident event generates due to cache error, as it no longer relevant. + */ + binlog_clear_incident(thd); + bool drop_table_was_logged= false; if (table) { bool tmp_table= table->s->tmp_table; @@ -5435,6 +5536,7 @@ create_info->db_type == partition_hton, &create_info->tabledef_version, tmp_table); + drop_table_was_logged= true; debug_crash_here("ddl_log_create_after_binlog"); thd->binlog_xid= 0; } @@ -5459,8 +5561,21 @@ if (create_info->table_was_deleted) { - /* Unlock locked table that was dropped by CREATE. */ - (void) trans_rollback_stmt(thd); + if (drop_table_was_logged) + { + /* for DROP binlogging the error status has to be canceled first */ + Diagnostics_area new_stmt_da(thd->query_id, false, true); + Diagnostics_area *old_stmt_da= thd->get_stmt_da(); + + thd->set_stmt_da(&new_stmt_da); + (void) trans_rollback_stmt(thd); + thd->set_stmt_da(old_stmt_da); + } + else + { + /* Unlock locked table that was dropped by CREATE. */ + (void) trans_rollback_stmt(thd); + } thd->locked_tables_list.unlock_locked_table(thd, create_info->mdl_ticket); } diff -Nru mariadb-10.11.11/sql/sql_insert.h mariadb-10.11.13/sql/sql_insert.h --- mariadb-10.11.11/sql/sql_insert.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_insert.h 2025-05-19 16:14:25.000000000 +0000 @@ -28,7 +28,7 @@ List &update_fields, List &update_values, enum_duplicates duplic, bool ignore, - COND **where, bool select_insert); + COND **where, bool select_insert, bool * const cache_results); bool mysql_insert(THD *thd,TABLE_LIST *table,List &fields, List &values, List &update_fields, List &update_values, enum_duplicates flag, diff -Nru mariadb-10.11.11/sql/sql_lex.cc mariadb-10.11.13/sql/sql_lex.cc --- mariadb-10.11.11/sql/sql_lex.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_lex.cc 2025-05-19 16:14:25.000000000 +0000 @@ -11179,7 +11179,8 @@ Field_pair *get_corresponding_field_pair(Item *item, List pair_list) { - DBUG_ASSERT(item->type() == Item::FIELD_ITEM || + DBUG_ASSERT(item->type() == Item::DEFAULT_VALUE_ITEM || + item->type() == Item::FIELD_ITEM || (item->type() == Item::REF_ITEM && ((((Item_ref *) item)->ref_type() == Item_ref::VIEW_REF) || (((Item_ref *) item)->ref_type() == Item_ref::REF)))); @@ -12244,6 +12245,48 @@ false; } +/** + Find the real table in prepared SELECT tree + + NOTE: all SELECT must be prepared (to have leaf table list). + + NOTE: it looks only for real tables (not view or derived) + + @param thd the current thread handle + @param db_name name of db of the table to look for + @param db_name name of db of the table to look for + + @return first found table, NULL or ERROR_TABLE +*/ + +TABLE_LIST *SELECT_LEX::find_table(THD *thd, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name) +{ + uchar buff[STACK_BUFF_ALLOC]; // Max argument in function + if (check_stack_overrun(thd, STACK_MIN_SIZE, buff)) + return NULL; + + List_iterator_fast ti(leaf_tables); + TABLE_LIST *table; + while ((table= ti++)) + { + if (cmp(&table->db, db_name) == 0 && + cmp(&table->table_name, table_name) == 0) + return table; + } + + for (SELECT_LEX_UNIT *u= first_inner_unit(); u; u= u->next_unit()) + { + for (st_select_lex *sl= u->first_select(); sl; sl=sl->next_select()) + { + if ((table= sl->find_table(thd, db_name, table_name))) + return table; + } + } + return NULL; +} + bool st_select_lex::is_query_topmost(THD *thd) { diff -Nru mariadb-10.11.11/sql/sql_lex.h mariadb-10.11.13/sql/sql_lex.h --- mariadb-10.11.11/sql/sql_lex.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_lex.h 2025-05-19 16:14:25.000000000 +0000 @@ -1690,6 +1690,10 @@ void lex_start(LEX *plex); bool is_unit_nest() { return (nest_flags & UNIT_NEST_FL); } void mark_as_unit_nest() { nest_flags= UNIT_NEST_FL; } + + TABLE_LIST *find_table(THD *thd, + const LEX_CSTRING *db_name, + const LEX_CSTRING *table_name); }; typedef class st_select_lex SELECT_LEX; @@ -4681,7 +4685,7 @@ int add_period(Lex_ident name, Lex_ident_sys_st start, Lex_ident_sys_st end) { - if (check_period_name(name.str)) { + if (check_column_name(name)) { my_error(ER_WRONG_COLUMN_NAME, MYF(0), name.str); return 1; } diff -Nru mariadb-10.11.11/sql/sql_parse.cc mariadb-10.11.13/sql/sql_parse.cc --- mariadb-10.11.11/sql/sql_parse.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_parse.cc 2025-05-19 16:14:25.000000000 +0000 @@ -7296,18 +7296,9 @@ DBUG_PRINT("info", ("derived: %d view: %d", table_ref->derived != 0, table_ref->view != 0)); - if (table_ref->is_anonymous_derived_table()) + if (table_ref->is_anonymous_derived_table() || table_ref->sequence) continue; - if (table_ref->sequence) - { - /* We want to have either SELECT or INSERT rights to sequences depending - on how they are accessed - */ - want_access= ((table_ref->lock_type >= TL_FIRST_WRITE) ? - INSERT_ACL : SELECT_ACL); - } - if (check_access(thd, want_access, table_ref->get_db_name().str, &table_ref->grant.privilege, &table_ref->grant.m_internal, @@ -10389,7 +10380,13 @@ bool check_ident_length(const LEX_CSTRING *ident) { - if (check_string_char_length(ident, 0, NAME_CHAR_LEN, system_charset_info, 1)) + /* + string_char_length desite the names, goes into Well_formed_prefix_status + so this is more than just a length comparison. Things like a primary key + doesn't have a name, therefore no length. Also the ident grammar allows + empty backtick. Check quickly the length, and if 0, accept that. + */ + if (ident->length && check_string_char_length(ident, 0, NAME_CHAR_LEN, system_charset_info, 1)) { my_error(ER_TOO_LONG_IDENT, MYF(0), ident->str); return 1; diff -Nru mariadb-10.11.11/sql/sql_prepare.cc mariadb-10.11.13/sql/sql_prepare.cc --- mariadb-10.11.11/sql/sql_prepare.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_prepare.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1304,6 +1304,7 @@ THD *thd= stmt->thd; List_iterator_fast its(values_list); List_item *values; + bool cache_results= FALSE; DBUG_ENTER("mysql_test_insert_common"); if (insert_precheck(thd, table_list)) @@ -1336,7 +1337,8 @@ if (mysql_prepare_insert(thd, table_list, fields, values, update_fields, update_values, duplic, ignore, - &unused_conds, FALSE)) + &unused_conds, FALSE, + &cache_results)) goto error; value_count= values->elements; diff -Nru mariadb-10.11.11/sql/sql_priv.h mariadb-10.11.13/sql/sql_priv.h --- mariadb-10.11.11/sql/sql_priv.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_priv.h 2025-05-19 16:14:25.000000000 +0000 @@ -281,6 +281,7 @@ #define OPTIMIZER_FIX_INNODB_CARDINALITY (8) #define OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF (16) #define OPTIMIZER_ADJ_FIX_CARD_MULT (32) +#define OPTIMIZER_ADJ_FIX_DERIVED_TABLE_READ_COST (64) #define OPTIMIZER_ADJ_DEFAULT (OPTIMIZER_ADJ_FIX_REUSE_RANGE_FOR_REF | \ OPTIMIZER_ADJ_FIX_CARD_MULT) diff -Nru mariadb-10.11.11/sql/sql_reload.cc mariadb-10.11.13/sql/sql_reload.cc --- mariadb-10.11.11/sql/sql_reload.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_reload.cc 2025-05-19 16:14:25.000000000 +0000 @@ -618,7 +618,7 @@ if (table_list->belong_to_view && check_single_table_access(thd, PRIV_LOCK_TABLES, table_list, FALSE)) { - table_list->hide_view_error(thd); + table_list->replace_view_error_with_generic(thd); goto error_reset_bits; } if (table_list->is_view_or_derived()) diff -Nru mariadb-10.11.11/sql/sql_select.cc mariadb-10.11.13/sql/sql_select.cc --- mariadb-10.11.11/sql/sql_select.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_select.cc 2025-05-19 16:14:25.000000000 +0000 @@ -3581,7 +3581,14 @@ continue; Item *item= new (thd->mem_root) Item_temptable_rowid(tab->table); item->fix_fields(thd, 0); - table_fields->push_back(item, thd->mem_root); + /* + table_fields points to JOIN::all_fields or JOIN::tmp_all_fields_*. + These lists start with "added" fields and then their suffix is shared + with JOIN::fields_list or JOIN::tmp_fields_list*. + Because of that, new elements can only be added to the front of the list, + not to the back. + */ + table_fields->push_front(item, thd->mem_root); cur->tmp_table_param->func_count++; } return 0; @@ -5994,7 +6001,10 @@ s->table->opt_range_condition_rows=s->records; } else + { + /* Update s->records and s->read_time */ s->scan_time(); + } if (s->table->is_splittable()) s->add_keyuses_for_splitting(); @@ -14049,6 +14059,36 @@ } +/* + Procedure of keys generation for result tables of materialized derived + tables/views. + + A key is generated for each equi-join pair {derived_table, some_other_table}. + Each generated key consists of fields of derived table used in equi-join. + Example: + + SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN + t1 ON tt.f1=t1.f3 and tt.f2.=t1.f4; + In this case for the derived table tt one key will be generated. It will + consist of two parts f1 and f2. + Example: + + SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN + t1 ON tt.f1=t1.f3 JOIN + t2 ON tt.f2=t2.f4; + In this case for the derived table tt two keys will be generated. + One key over f1 field, and another key over f2 field. + Currently optimizer may choose to use only one such key, thus the second + one will be dropped after range optimizer is finished. + See also JOIN::drop_unused_derived_keys function. + Example: + + SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN + t1 ON tt.f1=a_function(t1.f3); + In this case for the derived table tt one key will be generated. It will + consist of one field - f1. +*/ + static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array) { @@ -14759,7 +14799,7 @@ } goto no_join_cache; } - if (cache_level > 4 && no_bka_cache) + if (cache_level < 5 || no_bka_cache) goto no_join_cache; if ((flags & HA_MRR_NO_ASSOCIATION) && @@ -15461,6 +15501,7 @@ double JOIN_TAB::scan_time() { double res; + THD *thd= join->thd; if (table->is_created()) { if (table->is_filled_at_execution()) @@ -15481,10 +15522,53 @@ } res= read_time; } - else + else if (!(thd->variables.optimizer_adjust_secondary_key_costs & + OPTIMIZER_ADJ_FIX_DERIVED_TABLE_READ_COST)) { + /* + Old code, do not merge into 11.0+: + */ found_records= records=table->stat_records(); - read_time= found_records ? (double)found_records: 10.0;// TODO:fix this stub + read_time= found_records ? (double)found_records: 10.0; + res= read_time; + } + else + { + bool using_heap= 0; + TABLE_SHARE *share= table->s; + found_records= records= table->stat_records(); + + if (share->db_type() == heap_hton) + { + /* Check that the rows will fit into the heap table */ + ha_rows max_rows; + max_rows= (ha_rows) ((MY_MIN(thd->variables.tmp_memory_table_size, + thd->variables.max_heap_table_size)) / + MY_ALIGN(share->reclength, sizeof(char*))); + if (records <= max_rows) + { + /* The rows will fit into the heap table */ + using_heap= 1; + } + } + + /* + Code for the following is taken from the heap and aria storage engine. + In 11.# this is done without explict engine code + */ + if (using_heap) + read_time= (records / 20.0) + 1; + else + { + handler *file= table->file; + file->stats.data_file_length= share->reclength * records; + /* + Call the default scan_time() method as this is the cost for the + scan when heap is converted to Aria + */ + read_time= file->handler::scan_time(); + file->stats.data_file_length= 0; + } res= read_time; } return res; @@ -18544,6 +18628,8 @@ prev_table->dep_tables|= used_tables; if (prev_table->on_expr) { + /* If the ON expression is still there, it's an outer join */ + DBUG_ASSERT(prev_table->outer_join); prev_table->dep_tables|= table->on_expr_dep_tables; table_map prev_used_tables= prev_table->nested_join ? prev_table->nested_join->used_tables : @@ -18558,11 +18644,59 @@ prevents update of inner table dependences. For example it might happen if RAND() function is used in JOIN ON clause. - */ - if (!((prev_table->on_expr->used_tables() & - ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) & - ~prev_used_tables)) + */ + table_map prev_on_expr_deps= prev_table->on_expr->used_tables() & + ~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); + prev_on_expr_deps&= ~prev_used_tables; + + if (!prev_on_expr_deps) prev_table->dep_tables|= used_tables; + else + { + /* + Another possible case is when prev_on_expr_deps!=0 but it depends + on a table outside this join nest. SQL name resolution don't allow + this but it is possible when LEFT JOIN is inside a subquery which + is converted into a semi-join nest, Example: + + t1 SEMI JOIN ( + t2 + LEFT JOIN (t3 LEFT JOIN t4 ON t4.col=t1.col) ON expr + ) ON ... + + here, we would have prev_table=t4, table=t3. The condition + "ON t4.col=t1.col" depends on tables {t1, t4}. To make sure the + optimizer puts t3 before t4 we need to make sure t4.dep_tables + includes t3. + */ + + DBUG_ASSERT(table->embedding == prev_table->embedding); + if (table->embedding) + { + /* + Find what are the "peers" of "table" in the join nest. Normally, + it is table->embedding->nested_join->used_tables, but here we are + in the process of recomputing that value. + So, we walk the join list and collect the bitmap of peers: + */ + table_map peers= 0; + List_iterator_fast li(*join_list); + TABLE_LIST *peer; + while ((peer= li++)) + { + table_map curmap= peer->nested_join + ? peer->nested_join->used_tables + : peer->get_map(); + peers|= curmap; + } + /* + If prev_table doesn't depend on any of its peers, add a + dependency on nearest peer, that is, on 'table'. + */ + if (!(prev_on_expr_deps & peers)) + prev_table->dep_tables|= used_tables; + } + } } } prev_table= table; @@ -22354,6 +22488,8 @@ */ clear_tables(join, &cleared_tables); } + if (join->tmp_table_param.copy_funcs.elements) + copy_fields(&join->tmp_table_param); if (!join->having || join->having->val_bool()) { List *columns_list= (procedure ? &join->procedure_fields_list : @@ -27021,9 +27157,13 @@ original field name, we should additionally check if we have conflict for this name (in case if we would perform lookup in all tables). */ - if (resolution == RESOLVED_BEHIND_ALIAS && - order_item->fix_fields_if_needed_for_order_by(thd, order->item)) - return TRUE; + if (resolution == RESOLVED_BEHIND_ALIAS) + { + if (order_item->fix_fields_if_needed_for_order_by(thd, order->item)) + return TRUE; + // fix_fields may have replaced order->item, reset local variable. + order_item= *order->item; + } /* Lookup the current GROUP field in the FROM clause. */ order_item_type= order_item->type(); @@ -30489,7 +30629,7 @@ */ if (top_level || item->is_explicit_name() || - !check_column_name(item->name.str)) + !check_column_name(item->name)) item->print_item_w_name(str, query_type); else item->print(str, query_type); diff -Nru mariadb-10.11.11/sql/sql_show.cc mariadb-10.11.13/sql/sql_show.cc --- mariadb-10.11.11/sql/sql_show.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_show.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1435,7 +1435,14 @@ DBUG_RETURN(TRUE); } - load_db_opt_by_name(thd, dbname->str, &create); + if (load_db_opt_by_name(thd, dbname->str, &create) < 0) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_UNKNOWN_ERROR, + "Database '%.192s' does not have a db.opt file. " + "You can create one with ALTER DATABASE if needed", + dbname->str); + } } mysqld_show_create_db_get_fields(thd, &field_list); @@ -2943,25 +2950,27 @@ while (thread_info *thd_info= arg.thread_infos.get()) { + const char *str; + ulonglong start_time; + CSET_STRING query; + protocol->prepare_for_resend(); protocol->store(thd_info->thread_id); protocol->store(thd_info->user, strlen(thd_info->user), system_charset_info); protocol->store(thd_info->host, strlen(thd_info->host), system_charset_info); protocol->store_string_or_null(thd_info->db, system_charset_info); - if (thd_info->proc_info) - protocol->store(thd_info->proc_info, strlen(thd_info->proc_info), - system_charset_info); + if ((str= thd_info->proc_info)) + protocol->store(str, strlen(str), system_charset_info); else protocol->store(&command_name[thd_info->command], system_charset_info); - if (thd_info->start_time && now > thd_info->start_time) - protocol->store_long((now - thd_info->start_time) / HRTIME_RESOLUTION); + if ((start_time= thd_info->start_time) && now > start_time) + protocol->store_long((now - start_time) / HRTIME_RESOLUTION); else protocol->store_null(); protocol->store_string_or_null(thd_info->state_info, system_charset_info); - if (thd_info->query_string.length()) - protocol->store(thd_info->query_string.str(), - thd_info->query_string.length(), - thd_info->query_string.charset()); + query= thd_info->query_string; + if (query.length() && query.str()) + protocol->store(query.str(), query.length(), query.charset()); else protocol->store_null(); if (!(thd->variables.old_behavior & OLD_MODE_NO_PROGRESS_INFO)) @@ -4339,7 +4348,7 @@ break; } - if (lower_case_table_names && !rc) + if (lower_case_table_names == 1 && !rc) { /* We can safely do in-place upgrades here since all of the above cases diff -Nru mariadb-10.11.11/sql/sql_statistics.cc mariadb-10.11.13/sql/sql_statistics.cc --- mariadb-10.11.11/sql/sql_statistics.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_statistics.cc 2025-05-19 16:14:25.000000000 +0000 @@ -2077,12 +2077,9 @@ for (i= 0, state= calc_state; i < prefixes; i++, state++) { - if (i < prefixes) - { - double val= state->prefix_count == 0 ? - 0 : (double) state->entry_count / state->prefix_count; - index_info->collected_stats->set_avg_frequency(i, val); - } + double val= state->prefix_count == 0 ? + 0 : (double) state->entry_count / state->prefix_count; + index_info->collected_stats->set_avg_frequency(i, val); } } }; @@ -3142,7 +3139,7 @@ double avg_frequency= pk_read_stats->get_avg_frequency(j-1); set_if_smaller(avg_frequency, 1); double val= (pk_read_stats->get_avg_frequency(j) / - avg_frequency); + avg_frequency > 0 ? avg_frequency : 1); index_statistics->set_avg_frequency (l, val); } } diff -Nru mariadb-10.11.11/sql/sql_string.h mariadb-10.11.13/sql/sql_string.h --- mariadb-10.11.11/sql/sql_string.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_string.h 2025-05-19 16:14:25.000000000 +0000 @@ -909,6 +909,8 @@ :Charset(cs), Binary_string(str, len) { } String(const String &str) = default; + String(String &&str) noexcept + :Charset(std::move(str)), Binary_string(std::move(str)){} void set(String &str,size_t offset,size_t arg_length) { diff -Nru mariadb-10.11.11/sql/sql_table.cc mariadb-10.11.13/sql/sql_table.cc --- mariadb-10.11.11/sql/sql_table.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_table.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1587,12 +1587,19 @@ else { #ifdef WITH_WSREP - if (WSREP(thd) && hton && !wsrep_should_replicate_ddl(thd, hton)) + if (WSREP(thd) && hton) { - error= 1; - goto err; + handlerton *ht= hton; + // For partitioned tables resolve underlying handlerton + if (table->table && table->table->file->partition_ht()) + ht= table->table->file->partition_ht(); + if (!wsrep_should_replicate_ddl(thd, ht)) + { + error= 1; + goto err; + } } -#endif +#endif /* WITH_WSREP */ if (thd->locked_tables_mode == LTM_LOCK_TABLES || thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES) @@ -1863,18 +1870,6 @@ if (non_temp_tables_count) query_cache_invalidate3(thd, tables, 0); - /* - We are always logging drop of temporary tables. - The reason is to handle the following case: - - Use statement based replication - - CREATE TEMPORARY TABLE foo (logged) - - set row based replication - - DROP TEMPORARY TABLE foo (needs to be logged) - This should be fixed so that we remember if creation of the - temporary table was logged and only log it if the creation was - logged. - */ - if (non_trans_tmp_table_deleted || trans_tmp_table_deleted || non_tmp_table_deleted) { @@ -3112,7 +3107,7 @@ DBUG_ASSERT(sql_field->charset); - if (check_column_name(sql_field->field_name.str)) + if (check_column_name(sql_field->field_name)) { my_error(ER_WRONG_COLUMN_NAME, MYF(0), sql_field->field_name.str); DBUG_RETURN(TRUE); @@ -3750,7 +3745,7 @@ key_part_info++; } - if (!key_info->name.str || check_column_name(key_info->name.str)) + if (!key_info->name.str || check_column_name(key_info->name)) { my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key_info->name.str); DBUG_RETURN(TRUE); @@ -4989,9 +4984,26 @@ // In Galera cluster we support only InnoDB sequences if (db_type != DB_TYPE_INNODB) { - my_error(ER_NOT_SUPPORTED_YET, MYF(0), - "non-InnoDB sequences in Galera cluster"); - return(true); + // Currently any dynamic storage engine is not possible to identify + // using DB_TYPE_XXXX and ENGINE=SEQUENCE is one of them. + // Therefore, we get storage engine name from lex. + const LEX_CSTRING *tb_name= thd->lex->m_sql_cmd->option_storage_engine_name()->name(); + // (1) CREATE TABLE ... ENGINE=SEQUENCE OR + // (2) ALTER TABLE ... ENGINE= OR + // Note in ALTER TABLE table->s->sequence != nullptr + // (3) CREATE SEQUENCE ... ENGINE= + if ((thd->lex->sql_command == SQLCOM_CREATE_TABLE && + lex_string_eq(tb_name, STRING_WITH_LEN("SEQUENCE"))) || + (thd->lex->sql_command == SQLCOM_ALTER_TABLE) || + (thd->lex->sql_command == SQLCOM_CREATE_SEQUENCE)) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "non-InnoDB sequences in Galera cluster"); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE, + ER_NOT_SUPPORTED_YET, + "ENGINE=%s not supported by Galera", tb_name->str); + return(true); + } } // In Galera cluster it is best to use INCREMENT BY 0 with CACHE @@ -6223,7 +6235,7 @@ } else if (drop->type == Alter_drop::PERIOD) { - if (table->s->period.name.streq(drop->name)) + if (table->s->period.name.streq(Lex_ident(drop->name))) remove_drop= FALSE; } else /* Alter_drop::KEY and Alter_drop::FOREIGN_KEY */ @@ -9215,7 +9227,7 @@ for (bool found= false; !found && (drop= drop_it++); ) { found= drop->type == Alter_drop::PERIOD && - table->s->period.name.streq(drop->name); + table->s->period.name.streq(Lex_ident(drop->name)); } if (drop) @@ -9258,7 +9270,7 @@ } } - if (share->period.constr_name.streq(check->name.str)) + if (share->period.constr_name.streq(check->name)) { if (!drop_period && !keep) { @@ -10514,10 +10526,21 @@ if (WSREP(thd) && table && (thd->lex->sql_command == SQLCOM_ALTER_TABLE || thd->lex->sql_command == SQLCOM_CREATE_INDEX || - thd->lex->sql_command == SQLCOM_DROP_INDEX) && - !wsrep_should_replicate_ddl(thd, table->s->db_type())) - DBUG_RETURN(true); -#endif /* WITH_WSREP */ + thd->lex->sql_command == SQLCOM_DROP_INDEX)) + { + handlerton *ht= table->s->db_type(); + + // If alter used ENGINE= we use that + if (create_info->used_fields & HA_CREATE_USED_ENGINE) + ht= create_info->db_type; + // For partitioned tables resolve underlying handlerton + else if (table->file->partition_ht()) + ht= table->file->partition_ht(); + + if (!wsrep_should_replicate_ddl(thd, ht)) + DBUG_RETURN(true); + } +#endif DEBUG_SYNC(thd, "alter_table_after_open_tables"); @@ -11609,7 +11632,8 @@ - Neither old or new engine uses files from another engine The above is mainly true for the sequence and the partition engine. */ - engine_changed= ((new_table->file->ht != table->file->ht) && + engine_changed= ((new_table->file->storage_ht() != + table->file->storage_ht()) && ((!(new_table->file->ha_table_flags() & HA_FILE_BASED) || !(table->file->ha_table_flags() & HA_FILE_BASED))) && !(table->file->ha_table_flags() & HA_REUSES_FILE_NAMES) && @@ -11644,7 +11668,7 @@ debug_crash_here("ddl_log_alter_after_copy"); // Use old table /* - We are new ready to use the new table. Update the state in the + We are now ready to use the new table. Update the state in the ddl log so that we recovery know that the new table is ready and in case of crash it should use the new one and log the query to the binary log. @@ -12354,6 +12378,7 @@ if (alt_error > 0) { error= alt_error; + to->file->extra(HA_EXTRA_ABORT_ALTER_COPY); copy_data_error_ignore(error, false, to, thd, alter_ctx); } } diff -Nru mariadb-10.11.11/sql/sql_trigger.cc mariadb-10.11.13/sql/sql_trigger.cc --- mariadb-10.11.11/sql/sql_trigger.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_trigger.cc 2025-05-19 16:14:25.000000000 +0000 @@ -622,7 +622,12 @@ table= tables->table; #ifdef WITH_WSREP - if (WSREP(thd) && !wsrep_should_replicate_ddl(thd, table->s->db_type())) + /* Resolve should we replicate creation of the trigger. + It should be replicated if storage engine(s) associated + to trigger are replicated by Galera. + */ + if (WSREP(thd) && + !wsrep_should_replicate_ddl_iterate(thd, tables)) goto end; #endif diff -Nru mariadb-10.11.11/sql/sql_truncate.cc mariadb-10.11.13/sql/sql_truncate.cc --- mariadb-10.11.11/sql/sql_truncate.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_truncate.cc 2025-05-19 16:14:25.000000000 +0000 @@ -303,7 +303,7 @@ bool Sql_cmd_truncate_table::lock_table(THD *thd, TABLE_LIST *table_ref, bool *hton_can_recreate) { - handlerton *hton; + const handlerton *hton; bool versioned; bool sequence= false; TABLE *table= NULL; @@ -336,8 +336,15 @@ versioned= table->versioned(); hton= table->file->ht; #ifdef WITH_WSREP + /* Resolve should we replicate truncate. It should + be replicated if storage engine(s) associated + are replicated by Galera. If this is partitioned + table we need to find out default partition + handlerton. + */ if (WSREP(thd) && - !wsrep_should_replicate_ddl(thd, hton)) + !wsrep_should_replicate_ddl(thd, table->file->partition_ht() ? + table->file->partition_ht() : hton)) DBUG_RETURN(TRUE); #endif @@ -359,12 +366,26 @@ sequence= share->table_type == TABLE_TYPE_SEQUENCE; hton= share->db_type(); #ifdef WITH_WSREP - if (WSREP(thd) && - hton != view_pseudo_hton && - !wsrep_should_replicate_ddl(thd, hton)) + if (WSREP(thd) && hton != view_pseudo_hton) { - tdc_release_share(share); - DBUG_RETURN(TRUE); + /* Resolve should we replicate truncate. It should + be replicated if storage engine(s) associated + are replicated by Galera. If this is partitioned + table we need to find out default partition + handlerton. + */ + const handlerton* const ht= +#ifdef WITH_PARTITION_STORAGE_ENGINE + share->default_part_plugin ? + plugin_hton(share->default_part_plugin) : +#endif + hton; + + if (ht && !wsrep_should_replicate_ddl(thd, ht)) + { + tdc_release_share(share); + DBUG_RETURN(TRUE); + } } #endif diff -Nru mariadb-10.11.11/sql/sql_update.cc mariadb-10.11.13/sql/sql_update.cc --- mariadb-10.11.11/sql/sql_update.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_update.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1704,7 +1704,7 @@ if (multi_update_check_table_access(thd, tbl, tables_for_update, &updated)) { - tbl->hide_view_error(thd); + tbl->replace_view_error_with_generic(thd); return true; } } @@ -2356,7 +2356,8 @@ if (unlikely((thd->variables.option_bits & OPTION_SAFE_UPDATES) && error_if_full_join(join))) DBUG_RETURN(1); - if (join->implicit_grouping) + if (join->implicit_grouping || + join->select_lex->have_window_funcs()) { my_error(ER_INVALID_GROUP_FUNC_USE, MYF(0)); DBUG_RETURN(1); diff -Nru mariadb-10.11.11/sql/sql_view.cc mariadb-10.11.13/sql/sql_view.cc --- mariadb-10.11.11/sql/sql_view.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_view.cc 2025-05-19 16:14:25.000000000 +0000 @@ -183,7 +183,7 @@ for (uint column_no= 1; (item= it++); column_no++) { - if (item->is_explicit_name() || !check_column_name(item->name.str)) + if (item->is_explicit_name() || !check_column_name(item->name)) continue; name_len= my_snprintf(buff, NAME_LEN, "Name_exp_%u", column_no); item->orig_name= item->name.str; @@ -341,7 +341,7 @@ { if (check_single_table_access(thd, SELECT_ACL, tbl, FALSE)) { - tbl->hide_view_error(thd); + tbl->replace_view_error_with_generic(thd); goto err; } } @@ -452,8 +452,6 @@ lex->link_first_table_back(view, link_to_local); view->open_type= OT_BASE_ONLY; - WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); - /* ignore lock specs for CREATE statement */ @@ -471,13 +469,20 @@ } #ifdef WITH_WSREP - if(!wsrep_should_replicate_ddl_iterate(thd, static_cast(tables))) + /* Resolve should we replicate creation of the view. + It should be replicated if storage engine(s) associated + to view are replicated by Galera. + */ + if (WSREP(thd) && + !wsrep_should_replicate_ddl_iterate(thd, tables)) { res= TRUE; goto err_no_relink; } #endif + WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL); + view= lex->unlink_first_table(&link_to_local); if (check_db_dir_existence(view->db.str)) diff -Nru mariadb-10.11.11/sql/sql_yacc.yy mariadb-10.11.13/sql/sql_yacc.yy --- mariadb-10.11.11/sql/sql_yacc.yy 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sql_yacc.yy 2025-05-19 16:14:25.000000000 +0000 @@ -9107,7 +9107,7 @@ if ($4.str) { if (unlikely(Lex->sql_command == SQLCOM_CREATE_VIEW && - check_column_name($4.str))) + check_column_name($4))) my_yyabort_error((ER_WRONG_COLUMN_NAME, MYF(0), $4.str)); $2->base_flags|= item_base_t::IS_EXPLICIT_NAME; $2->set_name(thd, $4); diff -Nru mariadb-10.11.11/sql/structs.h mariadb-10.11.13/sql/structs.h --- mariadb-10.11.11/sql/structs.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/structs.h 2025-05-19 16:14:25.000000000 +0000 @@ -236,7 +236,7 @@ LEX_CSTRING user, host; void init() { memset(this, 0, sizeof(*this)); } void copy(MEM_ROOT *root, const LEX_CSTRING *usr, const LEX_CSTRING *host); - bool is_role() const { return user.str[0] && !host.str[0]; } + bool is_role() const { return user.str[0] && (!host.str || !host.str[0]); } void set_lex_string(LEX_CSTRING *l, char *buf) { if (is_role()) diff -Nru mariadb-10.11.11/sql/sys_vars.cc mariadb-10.11.13/sql/sys_vars.cc --- mariadb-10.11.11/sql/sys_vars.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/sys_vars.cc 2025-05-19 16:14:25.000000000 +0000 @@ -2982,7 +2982,7 @@ { "adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by", "fix_innodb_cardinality", "fix_reuse_range_for_ref", - "fix_card_multiplier", 0 + "fix_card_multiplier", "fix_derived_table_read_cost", 0 }; @@ -2999,8 +2999,9 @@ "secondary keys. " "fix_reuse_range_for_ref = Do a better job at reusing range access estimates " "when estimating ref access. " - "fix_card_multiplier = Fix the computation in selectivity_for_indexes." - " selectivity_multiplier. " + "fix_card_multiplier = Fix the computation in selectivity_for_indexes. " + "fix_derived_table_read_cost = Fix the cost of reading materialized " + "derived table. " "This variable will be deleted in MariaDB 11.0 as it is not needed with the " "new 11.0 optimizer.", @@ -6309,7 +6310,9 @@ static Sys_var_enum Sys_wsrep_forced_binlog_format( "wsrep_forced_binlog_format", "binlog format to take effect over user's choice", GLOBAL_VAR(wsrep_forced_binlog_format), CMD_LINE(REQUIRED_ARG), - wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC)); + wsrep_binlog_format_names, DEFAULT(BINLOG_FORMAT_UNSPEC), + NO_MUTEX_GUARD, NOT_IN_BINLOG, + ON_CHECK(wsrep_forced_binlog_format_check)); static Sys_var_mybool Sys_wsrep_recover_datadir( "wsrep_recover", "Recover database state after crash and exit", diff -Nru mariadb-10.11.11/sql/table.cc mariadb-10.11.13/sql/table.cc --- mariadb-10.11.11/sql/table.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/table.cc 2025-05-19 16:14:25.000000000 +0000 @@ -5320,9 +5320,10 @@ } -bool check_column_name(const char *name) +bool check_column_name(const Lex_ident &ident) { // name length in symbols + const char *name= ident.str, *end= ident.str + ident.length; size_t name_length= 0; bool last_char_is_space= TRUE; @@ -5332,9 +5333,7 @@ last_char_is_space= my_isspace(system_charset_info, *name); if (system_charset_info->use_mb()) { - int len=my_ismbchar(system_charset_info, name, - name+system_charset_info->mbmaxlen); - if (len) + if (int len= my_ismbchar(system_charset_info, name, end)) { name += len; name_length++; @@ -5354,12 +5353,6 @@ } -bool check_period_name(const char *name) -{ - return check_column_name(name); -} - - /** Checks whether a table is intact. Should be done *just* after the table has been opened. @@ -6360,9 +6353,9 @@ @pre This method can be called only if there is an error. */ -void TABLE_LIST::hide_view_error(THD *thd) +void TABLE_LIST::replace_view_error_with_generic(THD *thd) { - if ((thd->killed && !thd->is_error())|| thd->get_internal_handler()) + if ((thd->killed && !thd->is_error()) || thd->get_internal_handler()) return; /* Hide "Unknown column" or "Unknown function" error */ DBUG_ASSERT(thd->is_error()); @@ -9956,37 +9949,6 @@ return error; } -/* - Procedure of keys generation for result tables of materialized derived - tables/views. - - A key is generated for each equi-join pair derived table-another table. - Each generated key consists of fields of derived table used in equi-join. - Example: - - SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN - t1 ON tt.f1=t1.f3 and tt.f2.=t1.f4; - In this case for the derived table tt one key will be generated. It will - consist of two parts f1 and f2. - Example: - - SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN - t1 ON tt.f1=t1.f3 JOIN - t2 ON tt.f2=t2.f4; - In this case for the derived table tt two keys will be generated. - One key over f1 field, and another key over f2 field. - Currently optimizer may choose to use only one such key, thus the second - one will be dropped after range optimizer is finished. - See also JOIN::drop_unused_derived_keys function. - Example: - - SELECT * FROM (SELECT * FROM t1 GROUP BY 1) tt JOIN - t1 ON tt.f1=a_function(t1.f3); - In this case for the derived table tt one key will be generated. It will - consist of one field - f1. -*/ - - /* @brief diff -Nru mariadb-10.11.11/sql/table.h mariadb-10.11.13/sql/table.h --- mariadb-10.11.11/sql/table.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/table.h 2025-05-19 16:14:25.000000000 +0000 @@ -2192,7 +2192,7 @@ void init(vers_system_time_t _type, Vers_history_point _start= Vers_history_point(), Vers_history_point _end= Vers_history_point(), - Lex_ident _name= "SYSTEM_TIME") + Lex_ident _name= { STRING_WITH_LEN("SYSTEM_TIME") }) { type= _type; orig_type= _type; @@ -2207,7 +2207,7 @@ void set_all() { type= SYSTEM_TIME_ALL; - name= "SYSTEM_TIME"; + name= { STRING_WITH_LEN("SYSTEM_TIME") }; } void print(String *str, enum_query_type query_type) const; @@ -2572,7 +2572,7 @@ List *view_tables; /* most upper view this table belongs to */ TABLE_LIST *belong_to_view; - /* A derived table this table belongs to */ + /* A merged derived table this table belongs to */ TABLE_LIST *belong_to_derived; /* The view directly referencing this table @@ -2830,7 +2830,7 @@ bool check_single_table(TABLE_LIST **table, table_map map, TABLE_LIST *view); bool set_insert_values(MEM_ROOT *mem_root); - void hide_view_error(THD *thd); + void replace_view_error_with_generic(THD *thd); TABLE_LIST *find_underlying_table(TABLE *table); TABLE_LIST *first_leaf_for_name_resolution(); TABLE_LIST *last_leaf_for_name_resolution(); @@ -3078,6 +3078,8 @@ ulonglong m_table_ref_version; }; +#define ERROR_TABLE ((TABLE_LIST*) 0x1) + class Item; /* @@ -3388,8 +3390,7 @@ int db_errno); void update_create_info_from_table(HA_CREATE_INFO *info, TABLE *form); bool check_db_name(LEX_STRING *db); -bool check_column_name(const char *name); -bool check_period_name(const char *name); +bool check_column_name(const Lex_ident &name); bool check_table_name(const char *name, size_t length, bool check_for_path_chars); int rename_file_ext(const char * from,const char * to,const char * ext); char *get_field(MEM_ROOT *mem, Field *field); diff -Nru mariadb-10.11.11/sql/vers_string.h mariadb-10.11.13/sql/vers_string.h --- mariadb-10.11.11/sql/vers_string.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/vers_string.h 2025-05-19 16:14:25.000000000 +0000 @@ -62,7 +62,7 @@ { } Lex_cstring_with_compare(const LEX_CSTRING src) : Lex_cstring(src.str, src.length) { } - Lex_cstring_with_compare(const char *_str) : Lex_cstring(_str, strlen(_str)) + explicit Lex_cstring_with_compare(const char *_str) : Lex_cstring(_str, strlen(_str)) { } bool streq(const Lex_cstring_with_compare& b) const { diff -Nru mariadb-10.11.11/sql/wsrep_applier.cc mariadb-10.11.13/sql/wsrep_applier.cc --- mariadb-10.11.11/sql/wsrep_applier.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_applier.cc 2025-05-19 16:14:25.000000000 +0000 @@ -203,6 +203,21 @@ } } + if (LOG_EVENT_IS_WRITE_ROW(typ) || + LOG_EVENT_IS_UPDATE_ROW(typ) || + LOG_EVENT_IS_DELETE_ROW(typ)) + { + Rows_log_event* rle = static_cast(ev); + if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) + { + rle->set_flags(Rows_log_event::RELAXED_UNIQUE_CHECKS_F); + } + if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) + { + rle->set_flags(Rows_log_event::NO_FOREIGN_KEY_CHECKS_F); + } + } + /* Use the original server id for logging. */ thd->set_server_id(ev->server_id); thd->lex->current_select= 0; diff -Nru mariadb-10.11.11/sql/wsrep_client_service.cc mariadb-10.11.13/sql/wsrep_client_service.cc --- mariadb-10.11.11/sql/wsrep_client_service.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_client_service.cc 2025-05-19 16:14:25.000000000 +0000 @@ -304,6 +304,12 @@ replayer_service.replay_status(ret); } + // In Galera we allow only InnoDB sequences, thus + // sequence table updates are in writeset. + // Binlog cache needs reset so that binlog_close + // does not write cache to binlog file yet. + binlog_reset_cache(m_thd); + replayer_thd->main_security_ctx = old_ctx; delete replayer_thd; DBUG_RETURN(ret); diff -Nru mariadb-10.11.11/sql/wsrep_high_priority_service.cc mariadb-10.11.13/sql/wsrep_high_priority_service.cc --- mariadb-10.11.11/sql/wsrep_high_priority_service.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_high_priority_service.cc 2025-05-19 16:14:25.000000000 +0000 @@ -610,7 +610,7 @@ int ret= apply_events(thd, m_rli, data, err, true); thd->close_temporary_tables(); - if (!ret && !(ws_meta.flags() & wsrep::provider::flag::commit)) + if (!ret && !wsrep::commits_transaction(ws_meta.flags())) { thd->wsrep_cs().fragment_applied(ws_meta.seqno()); } @@ -778,7 +778,7 @@ } ret= ret || apply_events(thd, m_rli, data, err, true); thd->close_temporary_tables(); - if (!ret && !(ws_meta.flags() & wsrep::provider::flag::commit)) + if (!ret && !wsrep::commits_transaction(ws_meta.flags())) { thd->wsrep_cs().fragment_applied(ws_meta.seqno()); } diff -Nru mariadb-10.11.11/sql/wsrep_mysqld.cc mariadb-10.11.13/sql/wsrep_mysqld.cc --- mariadb-10.11.11/sql/wsrep_mysqld.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_mysqld.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1,5 +1,5 @@ -/* Copyright (c) 2008, 2023 Codership Oy - Copyright (c) 2020, 2022, MariaDB +/* Copyright (c) 2008, 2025, Codership Oy + Copyright (c) 2020, 2025, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -834,7 +834,8 @@ wsrep_server_gtid_t new_gtid; new_gtid.domain_id= wsrep_gtid_domain_id; new_gtid.server_id= global_system_variables.server_id; - new_gtid.seqno= 0; + /* Use seqno which was recovered in wsrep_init_gtid() */ + new_gtid.seqno= wsrep_gtid_server.seqno(); /* Try to search for domain_id and server_id combination in binlog if found continue from last seqno */ wsrep_get_binlog_gtid_seqno(new_gtid); wsrep_gtid_server.gtid(new_gtid); @@ -867,12 +868,13 @@ wsrep_init_position(); wsrep_sst_auth_init(); - if (strlen(wsrep_provider)== 0 || - !strcmp(wsrep_provider, WSREP_NONE)) + if (!*wsrep_provider || + !strcasecmp(wsrep_provider, WSREP_NONE)) { // enable normal operation in case no provider is specified global_system_variables.wsrep_on= 0; - int err= Wsrep_server_state::instance().load_provider(wsrep_provider, wsrep_provider_options ? wsrep_provider_options : ""); + int err= Wsrep_server_state::instance().load_provider( + wsrep_provider, wsrep_provider_options ? wsrep_provider_options : ""); if (err) { DBUG_PRINT("wsrep",("wsrep::init() failed: %d", err)); @@ -1603,7 +1605,12 @@ This allows autocommit SELECTs and a first SELECT after SET AUTOCOMMIT=0 TODO: modify to check if thd has locked any rows. */ - return thd->wsrep_cs().sync_wait(-1); + if (thd->wsrep_cs().sync_wait(-1)) + { + wsrep_override_error(thd, thd->wsrep_cs().current_error(), + thd->wsrep_cs().current_error_status()); + return true; + } } return false; @@ -2489,50 +2496,48 @@ /* Forward declarations. */ int wsrep_create_trigger_query(THD *thd, uchar** buf, size_t* buf_len); -bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list) -{ - if (WSREP(thd)) - { - for (const TABLE_LIST* it= table_list; it; it= it->next_global) - { - if (it->table && - !wsrep_should_replicate_ddl(thd, it->table->s->db_type())) - return false; - } - } - return true; -} +/*! Should DDL be replicated by Galera + * + * @param thd thread handle + * @param hton real storage engine handlerton + * + * @retval true if we should replicate DDL, false if not */ bool wsrep_should_replicate_ddl(THD* thd, const handlerton *hton) { if (!wsrep_check_mode(WSREP_MODE_STRICT_REPLICATION)) return true; - if (!hton) - return true; + DBUG_ASSERT(hton != nullptr); switch (hton->db_type) { + case DB_TYPE_UNKNOWN: + /* Special pseudo-handlertons (such as 10.6+ JSON tables). */ + return true; + break; case DB_TYPE_INNODB: return true; break; case DB_TYPE_MYISAM: if (wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM)) return true; - else - WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); break; case DB_TYPE_ARIA: if (wsrep_check_mode(WSREP_MODE_REPLICATE_ARIA)) - return true; - else - WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); + return true; + break; + case DB_TYPE_PARTITION_DB: + /* In most cases this means we could not find out + table->file->partition_ht() */ + return true; break; default: - WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); break; } + WSREP_DEBUG("wsrep OSU failed for %s", wsrep_thd_query(thd)); + /* wsrep_mode = STRICT_REPLICATION, treat as error */ my_error(ER_GALERA_REPLICATION_NOT_SUPPORTED, MYF(0)); push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, @@ -2542,6 +2547,26 @@ ha_resolve_storage_engine_name(hton)); return false; } + +bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list) +{ + for (const TABLE_LIST* it= table_list; it; it= it->next_global) + { + const TABLE* table= it->table; + if (table && !it->table_function) + { + /* If this is partitioned table we need to find out + implementing storage engine handlerton. + */ + const handlerton *ht= table->file->partition_ht(); + if (!ht) ht= table->s->db_type(); + if (!wsrep_should_replicate_ddl(thd, ht)) + return false; + } + } + return true; +} + /* Decide if statement should run in TOI. @@ -2650,9 +2675,8 @@ if (create_info) { const handlerton *hton= create_info->db_type; - if (!hton) - hton= ha_default_handlerton(thd); + hton= ha_default_handlerton(thd); if (!wsrep_should_replicate_ddl(thd, hton)) return false; } @@ -2787,7 +2811,6 @@ unireg_abort(1); } - /* returns: 0: statement was replicated as TOI @@ -2803,6 +2826,7 @@ DBUG_ASSERT(wsrep_OSU_method_get(thd) == WSREP_OSU_TOI); WSREP_DEBUG("TOI Begin: %s", wsrep_thd_query(thd)); + DEBUG_SYNC(thd, "wsrep_before_toi_begin"); if (wsrep_can_run_in_toi(thd, db, table, table_list, create_info) == false) { @@ -3043,12 +3067,13 @@ const wsrep::key_array *fk_tables, const HA_CREATE_INFO *create_info) { + DEBUG_SYNC(thd, "wsrep_kill_thd_before_enter_toi"); mysql_mutex_lock(&thd->LOCK_thd_kill); const killed_state killed = thd->killed; mysql_mutex_unlock(&thd->LOCK_thd_kill); if (killed) { - DBUG_ASSERT(FALSE); + /* The thread may have been killed as a result of memory pressure. */ return -1; } @@ -3217,29 +3242,28 @@ @param requestor_ctx The MDL context of the requestor @param ticket MDL ticket for the requested lock + @param key The key of the object (data) being protected - @retval TRUE Lock request can be granted - @retval FALSE Lock request cannot be granted */ - void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx, const MDL_ticket *ticket, const MDL_key *key) { THD *request_thd= requestor_ctx->get_thd(); - THD *granted_thd= ticket->get_ctx()->get_thd(); /* Fallback to the non-wsrep behaviour */ if (!WSREP(request_thd)) return; - const char* schema= key->db_name(); - int schema_len= key->db_name_length(); - mysql_mutex_lock(&request_thd->LOCK_thd_data); if (wsrep_thd_is_toi(request_thd) || wsrep_thd_is_applying(request_thd)) { + THD *granted_thd= ticket->get_ctx()->get_thd(); + + const char* schema= key->db_name(); + int schema_len= key->db_name_length(); + WSREP_DEBUG("wsrep_handle_mdl_conflict request TOI/APPLY for %s", wsrep_thd_query(request_thd)); THD_STAGE_INFO(request_thd, stage_waiting_isolation); @@ -3259,7 +3283,6 @@ /* Here we will call wsrep_abort_transaction so we should hold THD::LOCK_thd_data to protect victim from concurrent usage and THD::LOCK_thd_kill to protect from disconnect or delete. - */ mysql_mutex_lock(&granted_thd->LOCK_thd_kill); mysql_mutex_lock(&granted_thd->LOCK_thd_data); @@ -3303,16 +3326,21 @@ (granted_thd->system_thread != NON_SYSTEM_THREAD && granted_thd->mdl_context.has_explicit_locks())) { - WSREP_DEBUG("BF thread waiting for FLUSH for %s", - wsrep_thd_query(request_thd)); - THD_STAGE_INFO(request_thd, stage_waiting_ddl); + WSREP_DEBUG("BF thread waiting for %s", + granted_thd->lex->sql_command == SQLCOM_FLUSH ? "FLUSH" : "BACKUP"); ticket->wsrep_report(wsrep_debug); + if (granted_thd->current_backup_stage != BACKUP_FINISHED && wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP)) { wsrep_abort_thd(request_thd, granted_thd, 1); } } + else if (granted_thd->lex->sql_command == SQLCOM_LOCK_TABLES) + { + WSREP_DEBUG("BF thread waiting for LOCK TABLES"); + ticket->wsrep_report(wsrep_debug); + } else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE) { WSREP_DEBUG("DROP caused BF abort, conf %s for %s", diff -Nru mariadb-10.11.11/sql/wsrep_mysqld.h mariadb-10.11.13/sql/wsrep_mysqld.h --- mariadb-10.11.11/sql/wsrep_mysqld.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_mysqld.h 2025-05-19 16:14:25.000000000 +0000 @@ -356,7 +356,7 @@ const wsrep::key_array *fk_tables= nullptr, const HA_CREATE_INFO* create_info= nullptr); -bool wsrep_should_replicate_ddl(THD* thd, const handlerton *db_type); +bool wsrep_should_replicate_ddl(THD* thd, const handlerton *hton); bool wsrep_should_replicate_ddl_iterate(THD* thd, const TABLE_LIST* table_list); void wsrep_to_isolation_end(THD *thd); @@ -615,7 +615,6 @@ #define wsrep_thr_deinit() do {} while(0) #define wsrep_init_globals() do {} while(0) #define wsrep_create_appliers(X) do {} while(0) -#define wsrep_should_replicate_ddl(X,Y) (1) #define wsrep_cluster_address_exists() (false) #define WSREP_MYSQL_DB (0) #define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) do { } while(0) diff -Nru mariadb-10.11.11/sql/wsrep_server_service.cc mariadb-10.11.13/sql/wsrep_server_service.cc --- mariadb-10.11.11/sql/wsrep_server_service.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_server_service.cc 2025-05-19 16:14:25.000000000 +0000 @@ -192,6 +192,7 @@ break; case wsrep::log::unknown: WSREP_UNKNOWN("%s", message); + assert(0); break; } } diff -Nru mariadb-10.11.11/sql/wsrep_sst.cc mariadb-10.11.13/sql/wsrep_sst.cc --- mariadb-10.11.11/sql/wsrep_sst.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_sst.cc 2025-05-19 16:14:25.000000000 +0000 @@ -464,7 +464,7 @@ if (WSREP_ON) { int const rcode(seqno < 0 ? seqno : 0); - error= wsrep_sst_complete(thd,rcode, sst_gtid); + error= wsrep_sst_complete(thd, rcode, sst_gtid); } return error; @@ -1977,6 +1977,15 @@ wsrep::seqno(err ? wsrep::seqno::undefined() : wsrep::seqno(ret_seqno))); +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_sst_donor_after_donation", { + const char act[]= "now " + "SIGNAL sync.wsrep_sst_donor_after_donation_reached " + "WAIT_FOR signal.wsrep_sst_donor_after_donation_continue"; + DBUG_ASSERT(!debug_sync_set_action(thd.ptr, STRING_WITH_LEN(act))); + }); +#endif /* ENABLED_DEBUG_SYNC */ + Wsrep_server_state::instance().sst_sent(gtid, err); proc.wait(); diff -Nru mariadb-10.11.11/sql/wsrep_thd.h mariadb-10.11.13/sql/wsrep_thd.h --- mariadb-10.11.11/sql/wsrep_thd.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_thd.h 2025-05-19 16:14:25.000000000 +0000 @@ -237,25 +237,13 @@ wsrep::client_error ce, enum wsrep::provider::status status) { - DBUG_ASSERT(ce != wsrep::e_success); - switch (ce) - { - case wsrep::e_error_during_commit: - if (status == wsrep::provider::error_size_exceeded) - wsrep_override_error(thd, ER_UNKNOWN_ERROR, "Maximum writeset size exceeded"); - else - wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, 0, status); - break; - case wsrep::e_deadlock_error: - wsrep_override_error(thd, ER_LOCK_DEADLOCK); - break; - case wsrep::e_interrupted_error: - wsrep_override_error(thd, ER_QUERY_INTERRUPTED); - break; - case wsrep::e_size_exceeded_error: + DBUG_ASSERT(ce != wsrep::e_success); + switch (ce) + { + case wsrep::e_error_during_commit: + if (status == wsrep::provider::error_size_exceeded) wsrep_override_error(thd, ER_UNKNOWN_ERROR, "Maximum writeset size exceeded"); - break; - case wsrep::e_append_fragment_error: + else /* TODO: Figure out better error number */ if (status) wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, @@ -265,17 +253,45 @@ else wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, "Error while appending streaming replication fragment"); - break; - case wsrep::e_not_supported_error: - wsrep_override_error(thd, ER_NOT_SUPPORTED_YET); - break; - case wsrep::e_timeout_error: - wsrep_override_error(thd, ER_LOCK_WAIT_TIMEOUT); + break; + case wsrep::e_deadlock_error: + switch (thd->lex->sql_command) + { + case SQLCOM_XA_END: + case SQLCOM_XA_PREPARE: + wsrep_override_error(thd, ER_XA_RBDEADLOCK); break; default: - wsrep_override_error(thd, ER_UNKNOWN_ERROR); + wsrep_override_error(thd, ER_LOCK_DEADLOCK); break; } + break; + case wsrep::e_interrupted_error: + wsrep_override_error(thd, ER_QUERY_INTERRUPTED); + break; + case wsrep::e_size_exceeded_error: + wsrep_override_error(thd, ER_UNKNOWN_ERROR, "Maximum writeset size exceeded"); + break; + case wsrep::e_append_fragment_error: + /* TODO: Figure out better error number */ + if (status) + wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, + "Error while appending streaming replication fragment" + "(provider status: %s)", + wsrep::provider::to_string(status).c_str()); + else + wsrep_override_error(thd, ER_ERROR_DURING_COMMIT, + "Error while appending streaming replication fragment"); + break; + case wsrep::e_not_supported_error: + wsrep_override_error(thd, ER_NOT_SUPPORTED_YET); + break; + case wsrep::e_timeout_error: + wsrep_override_error(thd, ER_LOCK_WAIT_TIMEOUT); + break; + default: + wsrep_override_error(thd, ER_UNKNOWN_ERROR); + } } /** diff -Nru mariadb-10.11.11/sql/wsrep_trans_observer.h mariadb-10.11.13/sql/wsrep_trans_observer.h --- mariadb-10.11.11/sql/wsrep_trans_observer.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_trans_observer.h 2025-05-19 16:14:25.000000000 +0000 @@ -1,4 +1,4 @@ -/* Copyright 2016-2023 Codership Oy +/* Copyright 2016-2025 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -265,12 +265,17 @@ { DBUG_RETURN(ret); } + if ((ret= thd->wsrep_cs().before_prepare()) == 0) { DBUG_ASSERT(!thd->wsrep_trx().ws_meta().gtid().is_undefined()); + /* Here we init xid with UUID and wsrep seqno. GTID is + set to undefined because commit order is decided later + in wsrep_before_commit(). wsrep_before_prepare() is + executed out of order. */ wsrep_xid_init(&thd->wsrep_xid, thd->wsrep_trx().ws_meta().gtid(), - wsrep_gtid_server.gtid()); + wsrep_gtid_server.undefined()); } mysql_mutex_lock(&thd->LOCK_thd_kill); @@ -472,12 +477,6 @@ int wsrep_after_statement(THD* thd) { DBUG_ENTER("wsrep_after_statement"); - WSREP_DEBUG("wsrep_after_statement for %lu client_state %s " - " client_mode %s trans_state %s", - thd_get_thread_id(thd), - wsrep::to_c_string(thd->wsrep_cs().state()), - wsrep::to_c_string(thd->wsrep_cs().mode()), - wsrep::to_c_string(thd->wsrep_cs().transaction().state())); int ret= ((thd->wsrep_cs().state() != wsrep::client_state::s_none && thd->wsrep_cs().mode() == Wsrep_client_state::m_local) && !thd->internal_transaction() ? diff -Nru mariadb-10.11.11/sql/wsrep_var.cc mariadb-10.11.13/sql/wsrep_var.cc --- mariadb-10.11.11/sql/wsrep_var.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_var.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1,4 +1,4 @@ -/* Copyright 2008-2022 Codership Oy +/* Copyright 2008-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -353,14 +353,12 @@ var->save_result.string_value.length); start_pos_buf[var->save_result.string_value.length]= 0; - WSREP_DEBUG("SST wsrep_start_position check for new position %s old %s", - start_pos_buf, wsrep_start_position); + start_pos_buf, wsrep_start_position); // Verify the format. if (wsrep_start_position_verify(start_pos_buf)) return true; - // Give error if position is updated when wsrep is not enabled or // provider is not loaded. if ((!WSREP_ON || !Wsrep_server_state::instance().is_provider_loaded()) @@ -667,7 +665,7 @@ { wsrep_create_rollbacker(); WSREP_DEBUG("Cluster address update creating %ld applier threads running %lu", - wsrep_slave_threads, wsrep_running_applier_threads); + wsrep_slave_threads, wsrep_running_applier_threads); wsrep_create_appliers(wsrep_slave_threads); } mysql_mutex_unlock(&LOCK_wsrep_cluster_config); @@ -771,7 +769,7 @@ { wsrep_slave_count_change = (wsrep_slave_threads - wsrep_running_applier_threads); WSREP_DEBUG("Change on slave threads: New %ld old %lu difference %d", - wsrep_slave_threads, wsrep_running_applier_threads, wsrep_slave_count_change); + wsrep_slave_threads, wsrep_running_applier_threads, wsrep_slave_count_change); } bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) @@ -796,9 +794,9 @@ // Thread creation and execution is asyncronous, therefore we need // wait them to be started or error produced while (wsrep_running_applier_threads != (ulong)wsrep_slave_threads && - !wsrep_thread_create_failed.load(std::memory_order_relaxed)) + !wsrep_thread_create_failed.load(std::memory_order_relaxed)) { - my_sleep(1000); + my_sleep(1000); } mysql_mutex_lock(&LOCK_global_system_variables); @@ -987,6 +985,22 @@ bool wsrep_mode_check(sys_var *self, THD* thd, set_var* var) { + ulonglong new_wsrep_mode= var->save_result.ulonglong_value; + ulonglong old_wsrep_mode= wsrep_mode; + wsrep_mode= new_wsrep_mode; + if (wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM) || + wsrep_check_mode(WSREP_MODE_REPLICATE_ARIA)) + { + if (!(wsrep_forced_binlog_format == BINLOG_FORMAT_UNSPEC || + wsrep_forced_binlog_format == BINLOG_FORMAT_ROW)) + { + my_message(ER_WRONG_ARGUMENTS, "wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA] " + "can't be enabled if wsrep_forced_binlog != [NONE|ROW]", MYF(0)); + wsrep_mode= old_wsrep_mode; + return true; + } + } + wsrep_mode= old_wsrep_mode; return false; } @@ -1130,3 +1144,28 @@ return false; } +bool wsrep_forced_binlog_format_check(sys_var *self, THD* thd, set_var* var) +{ + ulonglong new_forced_binlog_format= var->save_result.ulonglong_value; + + if (!(new_forced_binlog_format == BINLOG_FORMAT_UNSPEC || + new_forced_binlog_format == BINLOG_FORMAT_ROW)) + { + if (wsrep_check_mode(WSREP_MODE_BINLOG_ROW_FORMAT_ONLY)) + { + my_message(ER_WRONG_ARGUMENTS, "wsrep_forced_binlog_format=[MIXED|STATEMENT] can't be set " + "if wsrep_mode=BINLOG_ROW_FORMAT_ONLY", MYF(0)); + return true; + } + + if (wsrep_check_mode(WSREP_MODE_REPLICATE_MYISAM) || + wsrep_check_mode(WSREP_MODE_REPLICATE_ARIA)) + { + my_message(ER_WRONG_ARGUMENTS, "wsrep_forced_binlog_format=[MIXED|STATEMENT] can't be set " + "if wsrep_mode=[REPLICATE_MYISAM|REPLICATE_ARIA]", MYF(0)); + return true; + } + } + + return false; +} diff -Nru mariadb-10.11.11/sql/wsrep_var.h mariadb-10.11.13/sql/wsrep_var.h --- mariadb-10.11.11/sql/wsrep_var.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_var.h 2025-05-19 16:14:25.000000000 +0000 @@ -1,4 +1,4 @@ -/* Copyright (C) 2013-2021 Codership Oy +/* Copyright (C) 2013-2023 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -110,6 +110,7 @@ extern bool wsrep_gtid_domain_id_update UPDATE_ARGS; extern bool wsrep_mode_check CHECK_ARGS; +extern bool wsrep_forced_binlog_format_check CHECK_ARGS; #else /* WITH_WSREP */ #define wsrep_provider_init(X) diff -Nru mariadb-10.11.11/sql/wsrep_xid.cc mariadb-10.11.13/sql/wsrep_xid.cc --- mariadb-10.11.11/sql/wsrep_xid.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_xid.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1,4 +1,4 @@ -/* Copyright 2015 Codership Oy +/* Copyright 2015-2025 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,6 +24,8 @@ #include #include /* std::sort() */ +#include /* std::string */ +#include /* std::stringstream */ /* * WSREPXid */ @@ -119,11 +121,7 @@ if (hton->set_checkpoint) { - const unsigned char* uuid= wsrep_xid_uuid(xid); - char uuid_str[40]= {0, }; - wsrep_uuid_print((const wsrep_uuid_t*)uuid, uuid_str, sizeof(uuid_str)); - WSREP_DEBUG("Set WSREPXid for InnoDB: %s:%lld", - uuid_str, (long long)wsrep_xid_seqno(xid)); + WSREP_DEBUG("Set WSREPXid for InnoDB: %s", wsrep_xid_print(xid).c_str()); hton->set_checkpoint(hton, xid); } return FALSE; @@ -150,12 +148,7 @@ if (hton->get_checkpoint) { hton->get_checkpoint(hton, xid); - wsrep_uuid_t uuid; - memcpy(&uuid, wsrep_xid_uuid(xid), sizeof(uuid)); - char uuid_str[40]= {0, }; - wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str)); - WSREP_DEBUG("Read WSREPXid from InnoDB: %s:%lld", - uuid_str, (long long)wsrep_xid_seqno(xid)); + WSREP_DEBUG("Read WSREPXid from InnoDB: %s", wsrep_xid_print(xid).c_str()); } return FALSE; } @@ -252,3 +245,29 @@ { std::sort(array, array + len, Wsrep_xid_cmp()); } + +std::string wsrep_xid_print(const XID *xid) +{ + std::stringstream ss; + const unsigned char* uuid= wsrep_xid_uuid(xid); + char uuid_str[40]= {0, }; + wsrep_uuid_print((const wsrep_uuid_t*)uuid, uuid_str, sizeof(uuid_str)); + wsrep_server_gtid_t gtid= {0,0,0}; + memcpy(>id, &xid->data[WSREP_XID_RPL_GTID_OFFSET], sizeof(wsrep_server_gtid_t)); + ss << uuid_str << ":" << wsrep_xid_seqno(xid) << " " << gtid.domain_id << "-" + << gtid.server_id << "-" << gtid.seqno; + return ss.str(); +} + +bool wsrep_is_xid_gtid_undefined(const XID *xid) +{ + wsrep_server_gtid_t gtid= {0,0,0}; + + if (wsrep_is_wsrep_xid(xid) && + xid->data[WSREP_XID_VERSION_OFFSET] == WSREP_XID_VERSION_3) + { + memcpy(>id, &xid->data[WSREP_XID_RPL_GTID_OFFSET], sizeof(wsrep_server_gtid_t)); + } + + return (gtid.seqno == 0 && gtid.server_id == 0 && gtid.domain_id == 0); +} diff -Nru mariadb-10.11.11/sql/wsrep_xid.h mariadb-10.11.13/sql/wsrep_xid.h --- mariadb-10.11.11/sql/wsrep_xid.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/sql/wsrep_xid.h 2025-05-19 16:14:25.000000000 +0000 @@ -1,4 +1,4 @@ -/* Copyright (C) 2015 Codership Oy +/* Copyright (C) 2015-2025 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,6 +34,8 @@ //void wsrep_set_SE_checkpoint(XID&); /* uncomment if needed */ void wsrep_sort_xid_array(XID *array, int len); +std::string wsrep_xid_print(const XID *xid); +bool wsrep_is_xid_gtid_undefined(const XID *xid); #endif /* WITH_WSREP */ #endif /* WSREP_UTILS_H */ diff -Nru mariadb-10.11.11/sql/yy_mariadb.cc mariadb-10.11.13/sql/yy_mariadb.cc --- mariadb-10.11.11/sql/yy_mariadb.cc 2025-01-30 11:01:27.000000000 +0000 +++ mariadb-10.11.13/sql/yy_mariadb.cc 2025-05-19 16:14:28.000000000 +0000 @@ -39851,7 +39851,7 @@ if ((yyvsp[0].lex_str).str) { if (unlikely(Lex->sql_command == SQLCOM_CREATE_VIEW && - check_column_name((yyvsp[0].lex_str).str))) + check_column_name((yyvsp[0].lex_str)))) my_yyabort_error((ER_WRONG_COLUMN_NAME, MYF(0), (yyvsp[0].lex_str).str)); (yyvsp[-2].item)->base_flags|= item_base_t::IS_EXPLICIT_NAME; (yyvsp[-2].item)->set_name(thd, (yyvsp[0].lex_str)); diff -Nru mariadb-10.11.11/sql/yy_oracle.cc mariadb-10.11.13/sql/yy_oracle.cc --- mariadb-10.11.11/sql/yy_oracle.cc 2025-01-30 11:01:27.000000000 +0000 +++ mariadb-10.11.13/sql/yy_oracle.cc 2025-05-19 16:14:28.000000000 +0000 @@ -39022,7 +39022,7 @@ if ((yyvsp[0].lex_str).str) { if (unlikely(Lex->sql_command == SQLCOM_CREATE_VIEW && - check_column_name((yyvsp[0].lex_str).str))) + check_column_name((yyvsp[0].lex_str)))) my_yyabort_error((ER_WRONG_COLUMN_NAME, MYF(0), (yyvsp[0].lex_str).str)); (yyvsp[-2].item)->base_flags|= item_base_t::IS_EXPLICIT_NAME; (yyvsp[-2].item)->set_name(thd, (yyvsp[0].lex_str)); diff -Nru mariadb-10.11.11/storage/connect/CMakeLists.txt mariadb-10.11.13/storage/connect/CMakeLists.txt --- mariadb-10.11.11/storage/connect/CMakeLists.txt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/connect/CMakeLists.txt 2025-05-19 16:14:25.000000000 +0000 @@ -413,14 +413,16 @@ RETURN() ENDIF() -IF(MSVC AND (CMAKE_CXX_FLAGS MATCHES "/MP")) +IF(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # domdoc.cpp uses compiler directive #import which is not compatible # with the /MP option, resulting in compiler error C2813. # Remove /MP for this file. + GET_TARGET_PROPERTY(CURRENT_COMPILE_OPTIONS connect COMPILE_OPTIONS) + LIST(REMOVE_ITEM CURRENT_COMPILE_OPTIONS "$<$:/MP>") + SET_TARGET_PROPERTIES(connect PROPERTIES COMPILE_OPTIONS "${CURRENT_COMPILE_OPTIONS}") SET(src_list ${CONNECT_SOURCES}) LIST(FIND src_list domdoc.cpp idx) IF(idx GREATER -1) - STRING(REPLACE "/MP" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") LIST(REMOVE_AT src_list ${idx}) SET_SOURCE_FILES_PROPERTIES(${src_list} PROPERTIES COMPILE_FLAGS "/MP") ENDIF() diff -Nru mariadb-10.11.11/storage/connect/connect.cc mariadb-10.11.13/storage/connect/connect.cc --- mariadb-10.11.11/storage/connect/connect.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/connect/connect.cc 2025-05-19 16:14:25.000000000 +0000 @@ -92,11 +92,11 @@ free(dbuserp); - if (trace(1)) - htrc("CntEndDB: Freeing Dup\n"); + if (trace(1)) + htrc("CntEndDB: Freeing Dup\n"); - g->Activityp->Aptr = NULL; - } // endif dbuserp + g->Activityp->Aptr = NULL; // Free PlgGetUser() data + } // endif dbuserp } // end of CntEndDB diff -Nru mariadb-10.11.11/storage/connect/plgxml.h mariadb-10.11.13/storage/connect/plgxml.h --- mariadb-10.11.11/storage/connect/plgxml.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/connect/plgxml.h 2025-05-19 16:14:25.000000000 +0000 @@ -5,7 +5,7 @@ /******************************************************************/ /* Dual XML implementation base classes defines. */ /******************************************************************/ -#if !defined(BASE_BUFFER_SIZE) +#ifndef LIBXML2_SUPPORT enum ElementType { // libxml2 XML_ELEMENT_NODE = 1, XML_ATTRIBUTE_NODE = 2, @@ -28,7 +28,7 @@ XML_XINCLUDE_START = 19, XML_XINCLUDE_END = 20, XML_DOCB_DOCUMENT_NODE = 21}; -#endif // !BASE_BUFFER_SIZE +#endif //#if !defined(NODE_TYPE_LIST) #ifdef NOT_USED diff -Nru mariadb-10.11.11/storage/connect/tabxml.cpp mariadb-10.11.13/storage/connect/tabxml.cpp --- mariadb-10.11.11/storage/connect/tabxml.cpp 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/connect/tabxml.cpp 2025-05-19 16:14:25.000000000 +0000 @@ -25,6 +25,9 @@ #include #include //#include +#ifdef LIBXML2_SUPPORT +#include +#endif #include "osutil.h" #define _O_RDONLY O_RDONLY #endif // !_WIN32 diff -Nru mariadb-10.11.11/storage/connect/user_connect.cc mariadb-10.11.13/storage/connect/user_connect.cc --- mariadb-10.11.11/storage/connect/user_connect.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/connect/user_connect.cc 2025-05-19 16:14:25.000000000 +0000 @@ -101,9 +101,6 @@ PACTIVITY ap= NULL; PDBUSER dup= NULL; - // Areasize= 64M because of VEC tables. Should be parameterisable -//g= PlugInit(NULL, 67108864); -//g= PlugInit(NULL, 134217728); // 128M was because of old embedded tests g= PlugInit(NULL, (size_t)worksize); // Check whether the initialization is complete @@ -113,12 +110,13 @@ printf("%s\n", g->Message); (void) PlugExit(g); + g= 0; - if (dup) - free(dup); + if (dup) + free(dup); return true; - } // endif g-> + } // endif g-> dup->Catalog= new MYCAT(NULL); @@ -128,17 +126,16 @@ g->Activityp= ap; g->Activityp->Aptr= dup; - pthread_mutex_lock(&usrmut); + pthread_mutex_lock(&usrmut); next= to_users; to_users= this; if (next) next->previous= this; - count = 1; - pthread_mutex_unlock(&usrmut); - - last_query_id= thdp->query_id; + count = 1; + pthread_mutex_unlock(&usrmut); + last_query_id= thdp->query_id; return false; } // end of user_init diff -Nru mariadb-10.11.11/storage/federatedx/federatedx_io.cc mariadb-10.11.13/storage/federatedx/federatedx_io.cc --- mariadb-10.11.11/storage/federatedx/federatedx_io.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/federatedx/federatedx_io.cc 2025-05-19 16:14:25.000000000 +0000 @@ -51,6 +51,7 @@ static const io_schemes_st federated_io_schemes[] = { { "mysql", &instantiate_io_mysql }, + { "mariadb", &instantiate_io_mysql }, { "null", instantiate_io_null } /* must be last element */ }; diff -Nru mariadb-10.11.11/storage/federatedx/ha_federatedx.cc mariadb-10.11.13/storage/federatedx/ha_federatedx.cc --- mariadb-10.11.11/storage/federatedx/ha_federatedx.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/federatedx/ha_federatedx.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1484,20 +1484,20 @@ sizeof(int) + 8); key.append(scheme); key.q_append('\0'); - server->hostname= (const char *) (intptr) key.length(); + size_t hostname_pos= key.length(); key.append(hostname); key.q_append('\0'); - server->database= (const char *) (intptr) key.length(); + size_t database_pos= key.length(); key.append(database); key.q_append('\0'); key.q_append((uint32) share->port); - server->socket= (const char *) (intptr) key.length(); + size_t socket_pos= key.length(); key.append(socket); key.q_append('\0'); - server->username= (const char *) (intptr) key.length(); + size_t username_pos= key.length(); key.append(username); key.q_append('\0'); - server->password= (const char *) (intptr) key.length(); + size_t password_pos= key.length(); key.append(password); key.c_ptr_safe(); // Ensure we have end \0 @@ -1505,13 +1505,12 @@ /* Copy and add end \0 */ server->key= (uchar *) strmake_root(mem_root, key.ptr(), key.length()); - /* pointer magic */ - server->scheme+= (intptr) server->key; - server->hostname+= (intptr) server->key; - server->database+= (intptr) server->key; - server->username+= (intptr) server->key; - server->password+= (intptr) server->key; - server->socket+= (intptr) server->key; + server->scheme= (const char *)server->key; + server->hostname= (const char *)server->key + hostname_pos; + server->database= (const char *)server->key + database_pos; + server->username= (const char *)server->key + username_pos; + server->password= (const char *)server->key + password_pos; + server->socket= (const char*)server->key + socket_pos; server->port= share->port; if (!share->socket) diff -Nru mariadb-10.11.11/storage/innobase/CMakeLists.txt mariadb-10.11.13/storage/innobase/CMakeLists.txt --- mariadb-10.11.11/storage/innobase/CMakeLists.txt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/CMakeLists.txt 2025-05-19 16:14:25.000000000 +0000 @@ -226,7 +226,6 @@ include/dict0pagecompress.h include/dict0pagecompress.inl include/dict0stats.h - include/dict0stats.inl include/dict0stats_bg.h include/dict0types.h include/dyn0buf.h diff -Nru mariadb-10.11.11/storage/innobase/btr/btr0sea.cc mariadb-10.11.13/storage/innobase/btr/btr0sea.cc --- mariadb-10.11.11/storage/innobase/btr/btr0sea.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/btr/btr0sea.cc 2025-05-19 16:14:25.000000000 +0000 @@ -195,7 +195,7 @@ } /** Lazily free detached metadata when removing the last reference. */ -ATTRIBUTE_COLD static void btr_search_lazy_free(dict_index_t *index) +ATTRIBUTE_COLD void btr_search_lazy_free(dict_index_t *index) { ut_ad(index->freed()); dict_table_t *table= index->table; @@ -219,8 +219,7 @@ table->autoinc_mutex.wr_unlock(); } -/** Disable the adaptive hash search system and empty the index. */ -void btr_search_disable() +ATTRIBUTE_COLD bool btr_search_disable() { dict_table_t* table; @@ -231,7 +230,7 @@ if (!btr_search_enabled) { dict_sys.unfreeze(); btr_search_x_unlock_all(); - return; + return false; } btr_search_enabled = false; @@ -259,23 +258,25 @@ btr_search_sys.clear(); btr_search_x_unlock_all(); + + return true; } /** Enable the adaptive hash search system. @param resize whether buf_pool_t::resize() is the caller */ -void btr_search_enable(bool resize) +ATTRIBUTE_COLD void btr_search_enable(bool resize) { if (!resize) { mysql_mutex_lock(&buf_pool.mutex); - bool changed = srv_buf_pool_old_size != srv_buf_pool_size; + const auto is_shrinking = buf_pool.is_shrinking(); mysql_mutex_unlock(&buf_pool.mutex); - if (changed) { + if (is_shrinking) { return; } } btr_search_x_lock_all(); - ulint hash_size = buf_pool_get_curr_size() / sizeof(void *) / 64; + ulint hash_size = buf_pool.curr_pool_size() / sizeof(void *) / 64; if (btr_search_sys.parts[0].heap) { ut_ad(btr_search_enabled); @@ -939,88 +940,6 @@ info->last_hash_succ = FALSE; } -/** Clear the adaptive hash index on all pages in the buffer pool. */ -inline void buf_pool_t::clear_hash_index() noexcept -{ - ut_ad(!resizing); - ut_ad(!btr_search_enabled); - - std::set garbage; - - for (chunk_t *chunk= chunks + n_chunks; chunk-- != chunks; ) - { - for (buf_block_t *block= chunk->blocks, * const end= block + chunk->size; - block != end; block++) - { - dict_index_t *index= block->index; - assert_block_ahi_valid(block); - - /* We can clear block->index and block->n_pointers when - holding all AHI latches exclusively; see the comments in buf0buf.h */ - - if (!index) - { -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(!block->n_pointers); -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - continue; - } - - ut_d(const auto s= block->page.state()); - /* Another thread may have set the state to - REMOVE_HASH in buf_LRU_block_remove_hashed(). - - The state change in buf_pool_t::realloc() is not observable - here, because in that case we would have !block->index. - - In the end, the entire adaptive hash index will be removed. */ - ut_ad(s >= buf_page_t::UNFIXED || s == buf_page_t::REMOVE_HASH); -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - block->n_pointers= 0; -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - if (index->freed()) - garbage.insert(index); - block->index= nullptr; - } - } - - for (dict_index_t *index : garbage) - btr_search_lazy_free(index); -} - -/** Get a buffer block from an adaptive hash index pointer. -This function does not return if the block is not identified. -@param ptr pointer to within a page frame -@return pointer to block, never NULL */ -inline buf_block_t* buf_pool_t::block_from_ahi(const byte *ptr) const noexcept -{ - chunk_t::map *chunk_map = chunk_t::map_ref; - ut_ad(chunk_t::map_ref == chunk_t::map_reg); - ut_ad(!resizing); - - chunk_t::map::const_iterator it= chunk_map->upper_bound(ptr); - ut_a(it != chunk_map->begin()); - - chunk_t *chunk= it == chunk_map->end() - ? chunk_map->rbegin()->second - : (--it)->second; - - const size_t offs= size_t(ptr - chunk->blocks->page.frame) >> - srv_page_size_shift; - ut_a(offs < chunk->size); - - buf_block_t *block= &chunk->blocks[offs]; - /* buf_pool_t::chunk_t::init() invokes buf_block_init() so that - block[n].frame == block->page.frame + n * srv_page_size. Check it. */ - ut_ad(block->page.frame == page_align(ptr)); - /* Read the state of the block without holding hash_lock. - A state transition to REMOVE_HASH is possible during - this execution. */ - ut_ad(block->page.state() >= buf_page_t::REMOVE_HASH); - - return block; -} - /** Tries to guess the right search position based on the hash search info of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts, and the function returns TRUE, then cursor->up_match and cursor->low_match @@ -1103,7 +1022,8 @@ return false; } - buf_block_t* block = buf_pool.block_from_ahi(rec); + buf_block_t* block = buf_pool.block_from(rec); + ut_ad(block->page.frame == page_align(rec)); buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get( block->page.id().fold()); @@ -2196,7 +2116,7 @@ for (; node != NULL; node = node->next) { const buf_block_t* block - = buf_pool.block_from_ahi((byte*) node->data); + = buf_pool.block_from(node->data); index_id_t page_index_id; if (UNIV_LIKELY(block->page.in_file())) { diff -Nru mariadb-10.11.11/storage/innobase/buf/buf0buddy.cc mariadb-10.11.13/storage/innobase/buf/buf0buddy.cc --- mariadb-10.11.11/storage/innobase/buf/buf0buddy.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/buf/buf0buddy.cc 2025-05-19 16:14:25.000000000 +0000 @@ -162,6 +162,20 @@ } #ifdef UNIV_DEBUG +const buf_block_t *buf_pool_t::contains_zip(const void *data, size_t shift) + const noexcept +{ + const size_t d= size_t(data) >> shift; + + for (size_t i= 0; i < n_blocks; i++) + { + const buf_block_t *block= get_nth_page(i); + if (size_t(block->page.zip.data) >> shift == d) + return block; + } + return nullptr; +} + /** Validate a given zip_free list. */ struct CheckZipFree { CheckZipFree(ulint i) : m_i(i) {} @@ -257,13 +271,10 @@ /** Add a block to the head of the appropriate buddy free list. @param[in,out] buf block to be freed @param[in] i index of buf_pool.zip_free[] */ -UNIV_INLINE -void -buf_buddy_add_to_free(buf_buddy_free_t* buf, ulint i) +static void buf_buddy_add_to_free(buf_buddy_free_t *buf, ulint i) { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(buf_pool.zip_free[i].start != buf); - buf_buddy_stamp_free(buf, i); UT_LIST_ADD_FIRST(buf_pool.zip_free[i], buf); ut_d(buf_buddy_list_validate(i)); @@ -272,9 +283,7 @@ /** Remove a block from the appropriate buddy free list. @param[in,out] buf block to be freed @param[in] i index of buf_pool.zip_free[] */ -UNIV_INLINE -void -buf_buddy_remove_from_free(buf_buddy_free_t* buf, ulint i) +static void buf_buddy_remove_from_free(buf_buddy_free_t *buf, ulint i) { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(buf_buddy_check_free(buf, i)); @@ -298,13 +307,10 @@ buf = UT_LIST_GET_FIRST(buf_pool.zip_free[i]); - if (buf_pool.is_shrinking() - && UT_LIST_GET_LEN(buf_pool.withdraw) - < buf_pool.withdraw_target) { - + if (size_t size = buf_pool.shrinking_size()) { while (buf != NULL && buf_pool.will_be_withdrawn( - reinterpret_cast(buf))) { + reinterpret_cast(buf), size)) { /* This should be withdrawn, not to be allocated */ buf = UT_LIST_GET_NEXT(list, buf); } @@ -312,6 +318,7 @@ if (buf) { buf_buddy_remove_from_free(buf, i); + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); } else if (i + 1 < BUF_BUDDY_SIZES) { /* Attempt to split. */ buf = buf_buddy_alloc_zip(i + 1); @@ -321,7 +328,6 @@ reinterpret_cast( reinterpret_cast(buf) + (BUF_BUDDY_LOW << i)); - ut_ad(!buf_pool.contains_zip(buddy)); buf_buddy_add_to_free(buddy, i); } } @@ -340,74 +346,52 @@ return(buf); } +#ifdef UNIV_DEBUG +/** number of blocks allocated to the buddy system */ +static size_t buf_buddy_n_frames; +#endif + /** Deallocate a buffer frame of srv_page_size. @param buf buffer frame to deallocate */ static void buf_buddy_block_free(void *buf) noexcept { mysql_mutex_assert_owner(&buf_pool.mutex); - ut_a(!ut_align_offset(buf, srv_page_size)); - - const ulint fold= BUF_POOL_ZIP_FOLD_PTR(buf); - buf_page_t **prev= buf_pool.zip_hash.cell_get(fold)-> - search(&buf_page_t::hash, [buf](const buf_page_t *b) - { - ut_ad(b->in_zip_hash); - ut_ad(b->state() == buf_page_t::MEMORY); - return b->frame == buf; - }); - - buf_page_t *bpage= *prev; - ut_a(bpage); - ut_a(bpage->frame == buf); - ut_d(bpage->in_zip_hash= false); - *prev= bpage->hash; - bpage->hash= nullptr; - + buf_block_t *block= buf_pool.block_from(buf); + ut_ad(block->page.state() == buf_page_t::MEMORY); + ut_ad(block->page.frame == buf); + ut_ad(!buf_pool.contains_zip(buf, srv_page_size_shift)); ut_d(memset(buf, 0, srv_page_size)); MEM_UNDEFINED(buf, srv_page_size); - - buf_LRU_block_free_non_file_page(reinterpret_cast(bpage)); - ut_ad(buf_pool.buddy_n_frames > 0); - ut_d(buf_pool.buddy_n_frames--); + buf_LRU_block_free_non_file_page(block); + ut_ad(buf_buddy_n_frames > 0); + ut_d(buf_buddy_n_frames--); } /** Allocate a buffer block to the buddy allocator. @param block buffer block to register */ static void buf_buddy_block_register(buf_block_t *block) noexcept { - const ulint fold= BUF_POOL_ZIP_FOLD(block); + ut_ad(buf_pool.is_uncompressed_current(block)); ut_ad(block->page.state() == buf_page_t::MEMORY); - - ut_a(block->page.frame); - ut_a(!ut_align_offset(block->page.frame, srv_page_size)); - - ut_ad(!block->page.in_zip_hash); - ut_d(block->page.in_zip_hash= true); - buf_pool.zip_hash.cell_get(fold)->append(block->page, &buf_page_t::hash); - ut_d(buf_pool.buddy_n_frames++); + ut_d(buf_buddy_n_frames++); } /** Allocate a block from a bigger object. @param[in] buf a block that is free to use @param[in] i index of buf_pool.zip_free[] -@param[in] j size of buf as an index of buf_pool.zip_free[] @return allocated block */ -static -void* -buf_buddy_alloc_from(void* buf, ulint i, ulint j) +static void *buf_buddy_alloc_from(void *buf, ulint i) { - ulint offs = BUF_BUDDY_LOW << j; - ut_ad(j <= BUF_BUDDY_SIZES); ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - ut_ad(j >= i); - ut_ad(!ut_align_offset(buf, offs)); + ut_ad(i <= BUF_BUDDY_SIZES); + ut_ad(!ut_align_offset(buf, srv_page_size)); + ut_ad(!buf_pool.contains_zip(buf, srv_page_size_shift)); /* Add the unused parts of the block to the free lists. */ - while (j > i) { + for (ulint j = BUF_BUDDY_SIZES, offs = srv_page_size; j-- > i; ) { buf_buddy_free_t* zip_buf; offs >>= 1; - j--; zip_buf = reinterpret_cast( reinterpret_cast(buf) + offs); @@ -422,7 +406,7 @@ @param i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -byte *buf_buddy_alloc_low(ulint i, bool *lru) +byte *buf_buddy_alloc_low(ulint i, bool *lru) noexcept { buf_block_t* block; @@ -439,7 +423,7 @@ } /* Try allocating from the buf_pool.free list. */ - block = buf_LRU_get_free_only(); + block = buf_pool.allocate(); if (block) { goto alloc_big; @@ -455,21 +439,21 @@ buf_buddy_block_register(block); block = reinterpret_cast( - buf_buddy_alloc_from(block->page.frame, i, BUF_BUDDY_SIZES)); + buf_buddy_alloc_from(block->page.frame, i)); func_exit: buf_pool.buddy_stat[i].used++; return reinterpret_cast(block); } -/** Try to relocate a block. The caller must hold zip_free_mutex, and this -function will release and lock it again. +/** Try to relocate a block. @param[in] src block to relocate @param[in] dst free block to relocated to @param[in] i index of buf_pool.zip_free[] @param[in] force true if we must relocated always @return true if relocated */ -static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) +static bool buf_buddy_relocate(void *src, void *dst, ulint i, bool force) + noexcept { buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; @@ -575,7 +559,7 @@ @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] i index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -void buf_buddy_free_low(void* buf, ulint i) +void buf_buddy_free_low(void* buf, ulint i) noexcept { buf_buddy_free_t* buddy; @@ -595,13 +579,12 @@ ut_ad(i < BUF_BUDDY_SIZES); ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i)); - ut_ad(!buf_pool.contains_zip(buf)); + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); /* Do not recombine blocks if there are few free blocks. We may waste up to 15360*max_len bytes to free blocks (1024 + 2048 + 4096 + 8192 = 15360) */ - if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16 - && !buf_pool.is_shrinking()) { + if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16) { goto func_exit; } @@ -615,10 +598,9 @@ /* The buddy is free: recombine */ buf_buddy_remove_from_free(buddy, i); buddy_is_free: - ut_ad(!buf_pool.contains_zip(buddy)); i++; buf = ut_align_down(buf, BUF_BUDDY_LOW << i); - + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); goto recombine; case BUF_BUDDY_STATE_USED: @@ -655,107 +637,120 @@ buf_buddy_add_to_free(reinterpret_cast(buf), i); } -/** Try to reallocate a block. -@param[in] buf buf_pool block to be reallocated -@param[in] size block size, up to srv_page_size -@return whether the reallocation succeeded */ -bool -buf_buddy_realloc(void* buf, ulint size) -{ - buf_block_t* block = NULL; - ulint i = buf_buddy_get_slot(size); - - mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(i <= BUF_BUDDY_SIZES); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - - if (i < BUF_BUDDY_SIZES) { - /* Try to allocate from the buddy system. */ - block = reinterpret_cast(buf_buddy_alloc_zip(i)); - } - - if (block == NULL) { - /* Try allocating from the buf_pool.free list. */ - block = buf_LRU_get_free_only(); - - if (block == NULL) { - return(false); /* free_list was not enough */ - } +/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::shrink(). +@param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page +@param block uncompressed block for storage +@return block +@retval nullptr if the block was consumed */ +ATTRIBUTE_COLD +buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept +{ + ut_ad(bpage->zip.data); + + void *dst= nullptr; + ulint size= page_zip_get_size(&bpage->zip); + ulint i= buf_buddy_get_slot(size); + + ut_ad(buf_pool.will_be_withdrawn(bpage->zip.data, size)); + ut_ad(bpage->can_relocate()); + ut_ad(i <= BUF_BUDDY_SIZES); + ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); + + if (UNIV_LIKELY(i < BUF_BUDDY_SIZES)) + dst= buf_buddy_alloc_zip(i); + + if (!dst) + { + buf_buddy_block_register(block); + dst= buf_buddy_alloc_from(block->page.frame, i); + ut_ad(dst); + block= nullptr; + } + + void *src= bpage->zip.data; + memcpy_aligned(dst, src, size); + bpage->zip.data= static_cast(dst); + buf_pool.buddy_stat[i].relocated++; + + while (i < BUF_BUDDY_SIZES) + { + MEM_UNDEFINED(src, BUF_BUDDY_LOW << i); + /* Try to combine adjacent blocks. */ + buf_buddy_free_t *buddy= reinterpret_cast + (buf_buddy_get(static_cast(src), BUF_BUDDY_LOW << i)); - buf_buddy_block_register(block); - - block = reinterpret_cast( - buf_buddy_alloc_from( - block->page.frame, i, BUF_BUDDY_SIZES)); - } - - buf_pool.buddy_stat[i].used++; - - /* Try to relocate the buddy of buf to the free block. */ - if (buf_buddy_relocate(buf, block, i, true)) { - /* succeeded */ - buf_buddy_free_low(buf, i); - } else { - /* failed */ - buf_buddy_free_low(block, i); - } - - return(true); /* free_list was enough */ -} - -/** Combine all pairs of free buddies. */ -void buf_buddy_condense_free() -{ - mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(buf_pool.is_shrinking()); - - for (ulint i = 0; i < UT_ARR_SIZE(buf_pool.zip_free); ++i) { - buf_buddy_free_t* buf = - UT_LIST_GET_FIRST(buf_pool.zip_free[i]); - - /* seek to withdraw target */ - while (buf != NULL - && !buf_pool.will_be_withdrawn( - reinterpret_cast(buf))) { - buf = UT_LIST_GET_NEXT(list, buf); - } - - while (buf != NULL) { - buf_buddy_free_t* next = - UT_LIST_GET_NEXT(list, buf); - - buf_buddy_free_t* buddy = - reinterpret_cast( - buf_buddy_get( - reinterpret_cast(buf), - BUF_BUDDY_LOW << i)); - - /* seek to the next withdraw target */ - while (true) { - while (next != NULL - && !buf_pool.will_be_withdrawn( - reinterpret_cast(next))) { - next = UT_LIST_GET_NEXT(list, next); - } - - if (buddy != next) { - break; - } - - next = UT_LIST_GET_NEXT(list, next); - } - - if (buf_buddy_is_free(buddy, i) - == BUF_BUDDY_STATE_FREE) { - /* Both buf and buddy are free. - Try to combine them. */ - buf_buddy_remove_from_free(buf, i); - buf_pool.buddy_stat[i].used++; + if (buf_buddy_is_free(buddy, i) != BUF_BUDDY_STATE_FREE) + { + ut_ad(!buf_pool.contains_zip(src, BUF_BUDDY_LOW_SHIFT + i)); + buf_buddy_add_to_free(static_cast(src), i); + return block; + } + + /* The buddy is free: recombine */ + buf_buddy_remove_from_free(buddy, i); + i++; + src= ut_align_down(src, BUF_BUDDY_LOW << i); + } + + buf_buddy_block_free(src); + return block; +} + +/** Combine all pairs of free buddies. +@param size the target innodb_buffer_pool_size */ +ATTRIBUTE_COLD void buf_buddy_condense_free(size_t size) noexcept +{ + ut_ad(size); + ut_ad(size == buf_pool.shrinking_size()); + + for (ulint i= 0; i < array_elements(buf_pool.zip_free); i++) + { + buf_buddy_free_t *buf= UT_LIST_GET_FIRST(buf_pool.zip_free[i]); + + /* seek to withdraw target */ + while (buf && + !buf_pool.will_be_withdrawn(reinterpret_cast(buf), size)) + buf= UT_LIST_GET_NEXT(list, buf); - buf_buddy_free_low(buf, i); - } + for (buf_buddy_free_t *next= buf; buf; buf= next) + { + buf_buddy_free_t *buddy= reinterpret_cast + (buf_buddy_get(reinterpret_cast(buf), BUF_BUDDY_LOW << i)); - buf = next; - } - } + /* seek to the next withdraw target */ + do + { + while ((next= UT_LIST_GET_NEXT(list, next)) && + !buf_pool.will_be_withdrawn(reinterpret_cast(next), + size)) {} + } + while (buddy == next); + + if (buf_buddy_is_free(buddy, i) != BUF_BUDDY_STATE_FREE) + continue; + + buf_buddy_remove_from_free(buf, i); + ulint j= i; + recombine: + buf_buddy_remove_from_free(buddy, j); + j++; + buf= static_cast + (ut_align_down(buf, BUF_BUDDY_LOW << j)); + MEM_UNDEFINED(buf, BUF_BUDDY_LOW << j); + + if (j == BUF_BUDDY_SIZES) + { + buf_buddy_block_free(buf); + continue; + } + + buddy= reinterpret_cast + (buf_buddy_get(reinterpret_cast(buf), BUF_BUDDY_LOW << j)); + if (buf_buddy_is_free(buddy, j) == BUF_BUDDY_STATE_FREE) + goto recombine; + + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + j)); + buf_buddy_add_to_free(buf, j); + } + } } diff -Nru mariadb-10.11.11/storage/innobase/buf/buf0buf.cc mariadb-10.11.13/storage/innobase/buf/buf0buf.cc --- mariadb-10.11.11/storage/innobase/buf/buf0buf.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/buf/buf0buf.cc 2025-05-19 16:14:25.000000000 +0000 @@ -47,8 +47,6 @@ #include "lock0lock.h" #include "btr0sea.h" #include "ibuf0ibuf.h" -#include "trx0undo.h" -#include "trx0purge.h" #include "log0log.h" #include "dict0stats_bg.h" #include "srv0srv.h" @@ -64,6 +62,7 @@ #include #include #include "log.h" +#include "my_virtual_mem.h" using st_::span; @@ -277,6 +276,56 @@ */ #ifndef UNIV_INNOCHECKSUM +/** Compute the number of page frames needed for buf_block_t, +per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr uint8_t first_page(size_t ps) +{ + return uint8_t(innodb_buffer_pool_extent_size / ps - + innodb_buffer_pool_extent_size / (ps + sizeof(buf_block_t))); +} + +/** Compute the number of bytes needed for buf_block_t, +per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr size_t first_frame(size_t ps) +{ + return first_page(ps) * ps; +} + +/** Compute the number of pages per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr uint16_t pages(size_t ps) +{ + return uint16_t(innodb_buffer_pool_extent_size / ps - first_page(ps)); +} + +/** The byte offset of the first page frame in a buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr size_t first_frame_in_extent[]= +{ + first_frame(4096), first_frame(8192), first_frame(16384), + first_frame(32768), first_frame(65536) +}; + +/** The position offset of the first page frame in a buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr uint8_t first_page_in_extent[]= +{ + first_page(4096), first_page(8192), first_page(16384), + first_page(32768), first_page(65536) +}; + +/** Number of pages per buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr size_t pages_in_extent[]= +{ + pages(4096), pages(8192), pages(16384), pages(32768), pages(65536) +}; + # ifdef SUX_LOCK_GENERIC void page_hash_latch::read_lock_wait() noexcept { @@ -326,8 +375,6 @@ /** The InnoDB buffer pool */ buf_pool_t buf_pool; -buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_reg; -buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_ref; #ifdef UNIV_DEBUG /** This is used to insert validation operations in execution @@ -511,16 +558,18 @@ } #ifndef UNIV_INNOCHECKSUM -/** Checks whether the lsn present in the page is lesser than the -peek current lsn. -@param check_lsn lsn to check +/** Check whether a page is newer than the durable LSN. +@param check_lsn whether to check the LSN @param read_buf page frame -@return whether the FIL_PAGE_LSN is invalid */ -static bool buf_page_check_lsn(bool check_lsn, const byte *read_buf) +@return whether the FIL_PAGE_LSN is invalid (ahead of the durable LSN) */ +static bool buf_page_check_lsn(bool check_lsn, const byte *read_buf) noexcept { if (!check_lsn) return false; - lsn_t current_lsn= log_sys.get_lsn(); + /* A page may not be read before it is written, and it may not be + written before the corresponding log has been durably written. + Hence, we refer to the current durable LSN here */ + lsn_t current_lsn= log_sys.get_flushed_lsn(std::memory_order_relaxed); if (UNIV_UNLIKELY(current_lsn == log_sys.FIRST_LSN) && srv_force_recovery == SRV_FORCE_NO_LOG_REDO) return false; @@ -797,6 +846,11 @@ bool setup() { + m_num_fds= 0; + + if (my_use_large_pages) + return false; + static_assert(array_elements(m_fds) == (array_elements(m_triggers) + 1), "insufficient fds"); std::string memcgroup{"/sys/fs/cgroup"}; @@ -809,7 +863,6 @@ cgroup.erase(0, 3); // Remove "0::" memcgroup+= cgroup + "/memory.pressure"; - m_num_fds= 0; for (auto trig= std::begin(m_triggers); trig!= std::end(m_triggers); ++trig) { if ((m_fds[m_num_fds].fd= @@ -958,29 +1011,121 @@ } /** Initialize mem pressure. */ -ATTRIBUTE_COLD void buf_mem_pressure_detect_init() +ATTRIBUTE_COLD static void buf_mem_pressure_detect_init() noexcept { mem_pressure_obj.setup(); } -ATTRIBUTE_COLD void buf_mem_pressure_shutdown() +ATTRIBUTE_COLD void buf_mem_pressure_shutdown() noexcept { mem_pressure_obj.join(); } -#endif /* __linux__ */ +#endif + +#if defined __linux__ || !defined DBUG_OFF +inline void buf_pool_t::garbage_collect() noexcept +{ + mysql_mutex_lock(&mutex); + const size_t old_size{size_in_bytes}, min_size{size_in_bytes_auto_min}; + const size_t reduce_size= + std::max(innodb_buffer_pool_extent_size, + ut_calc_align((old_size - min_size) / 2, + innodb_buffer_pool_extent_size)); + if (old_size < min_size + reduce_size || + first_to_withdraw || old_size != size_in_bytes_requested) + { + mysql_mutex_unlock(&mutex); + sql_print_information("InnoDB: Memory pressure event disregarded;" + " innodb_buffer_pool_size=%zum," + " innodb_buffer_pool_size_min=%zum", + old_size >> 20, min_size >> 20); + return; + } + + size_t size= old_size - reduce_size; + size_t n_blocks_new= get_n_blocks(size); + + ut_ad(UT_LIST_GET_LEN(withdrawn) == 0); + ut_ad(n_blocks_to_withdraw == 0); + + n_blocks_to_withdraw= n_blocks - n_blocks_new; + first_to_withdraw= &get_nth_page(n_blocks_new)->page; + + size_in_bytes_requested= size; + mysql_mutex_unlock(&mutex); + mysql_mutex_lock(&flush_list_mutex); + page_cleaner_wakeup(true); + my_cond_wait(&done_flush_list, &flush_list_mutex.m_mutex); + mysql_mutex_unlock(&flush_list_mutex); +# ifdef BTR_CUR_HASH_ADAPT + bool ahi_disabled= btr_search_disable(); +# endif /* BTR_CUR_HASH_ADAPT */ + time_t start= time(nullptr); + mysql_mutex_lock(&mutex); + + do + { + if (shrink(size)) + { + const size_t old_blocks{n_blocks}; + n_blocks= n_blocks_new; + + size_t s= n_blocks_new / BUF_READ_AHEAD_PORTION; + read_ahead_area= s >= READ_AHEAD_PAGES + ? READ_AHEAD_PAGES + : my_round_up_to_next_power(uint32(s)); + + os_total_large_mem_allocated-= reduce_size; + shrunk(size, reduce_size); + ibuf_max_size_update(srv_change_buffer_max_size); +# ifdef BTR_CUR_HASH_ADAPT + if (ahi_disabled) + btr_search_enable(true); +# endif + mysql_mutex_unlock(&mutex); + sql_print_information("InnoDB: Memory pressure event shrunk" + " innodb_buffer_pool_size=%zum (%zu pages)" + " from %zum (%zu pages)", + size >> 20, n_blocks_new, old_size >> 20, + old_blocks); + ut_d(validate()); + return; + } + } + while (time(nullptr) - start < 15); + + ut_ad(size_in_bytes > size_in_bytes_requested); + n_blocks_to_withdraw= 0; + first_to_withdraw= nullptr; + size_in_bytes_requested= size_in_bytes; + + while (buf_page_t *b= UT_LIST_GET_FIRST(withdrawn)) + { + UT_LIST_REMOVE(withdrawn, b); + UT_LIST_ADD_LAST(free, b); + ut_d(b->in_free_list= true); + ut_ad(b->state() == buf_page_t::NOT_USED); + b->lock.init(); + } + + mysql_mutex_unlock(&mutex); + sql_print_information("InnoDB: Memory pressure event failed to shrink" + " innodb_buffer_pool_size=%zum", old_size); + ut_d(validate()); +} +#endif #if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP) -/** Enable buffers to be dumped to core files +/** Enable buffers to be dumped to core files. -A convience function, not called anyhwere directly however +A convenience function, not called anyhwere directly however it is left available for gdb or any debugger to call in the event that you want all of the memory to be dumped to a core file. -Returns number of errors found in madvise calls. */ +@return number of errors found in madvise() calls */ MY_ATTRIBUTE((used)) -int -buf_madvise_do_dump() +int buf_pool_t::madvise_do_dump() noexcept { int ret= 0; @@ -991,20 +1136,13 @@ MADV_DODUMP); } - mysql_mutex_lock(&buf_pool.mutex); - auto chunk = buf_pool.chunks; - - for (ulint n = buf_pool.n_chunks; n--; chunk++) { - ret+= madvise(chunk->mem, chunk->mem_size(), MADV_DODUMP); - } - - mysql_mutex_unlock(&buf_pool.mutex); + ret+= madvise(buf_pool.memory, buf_pool.size_in_bytes, MADV_DODUMP); return ret; } #endif #ifndef UNIV_DEBUG -static inline byte hex_to_ascii(byte hex_digit) +static inline byte hex_to_ascii(byte hex_digit) noexcept { const int offset= hex_digit <= 9 ? '0' : 'a' - 10; return byte(hex_digit + offset); @@ -1040,163 +1178,80 @@ #endif } -/** Initialize a buffer page descriptor. -@param[in,out] block buffer page descriptor -@param[in] frame buffer page frame */ -static -void -buf_block_init(buf_block_t* block, byte* frame) +IF_DBUG(,inline) byte *buf_block_t::frame_address() const noexcept { - /* This function should only be executed at database startup or by - buf_pool.resize(). Either way, adaptive hash index must not exist. */ - assert_block_ahi_empty_on_init(block); - - block->page.frame = frame; + static_assert(ut_is_2pow(innodb_buffer_pool_extent_size), ""); - MEM_MAKE_DEFINED(&block->modify_clock, sizeof block->modify_clock); - ut_ad(!block->modify_clock); - MEM_MAKE_DEFINED(&block->page.lock, sizeof block->page.lock); - block->page.lock.init(); - block->page.init(buf_page_t::NOT_USED, page_id_t(~0ULL)); -#ifdef BTR_CUR_HASH_ADAPT - MEM_MAKE_DEFINED(&block->index, sizeof block->index); - ut_ad(!block->index); -#endif /* BTR_CUR_HASH_ADAPT */ - ut_d(block->in_unzip_LRU_list = false); - ut_d(block->in_withdraw_list = false); - - page_zip_des_init(&block->page.zip); - - MEM_MAKE_DEFINED(&block->page.hash, sizeof block->page.hash); - ut_ad(!block->page.hash); + byte *frame_= reinterpret_cast + ((reinterpret_cast(this) & ~(innodb_buffer_pool_extent_size - 1)) | + first_frame_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]); + ut_ad(reinterpret_cast(this) + sizeof(*this) <= frame_); + frame_+= + (((reinterpret_cast(this) & (innodb_buffer_pool_extent_size - 1)) / + sizeof(*this)) << srv_page_size_shift); + return frame_; } -/** Allocate a chunk of buffer frames. -@param bytes requested size -@return whether the allocation succeeded */ -inline bool buf_pool_t::chunk_t::create(size_t bytes) noexcept +buf_block_t *buf_pool_t::block_from(const void *ptr) noexcept { - DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return false;); - /* Round down to a multiple of page size, although it already should be. */ - bytes= ut_2pow_round(bytes, srv_page_size); - - mem= buf_pool.allocator.allocate_large_dontdump(bytes, &mem_pfx); - - if (UNIV_UNLIKELY(!mem)) - return false; - - MEM_UNDEFINED(mem, mem_size()); - -#ifdef HAVE_LIBNUMA - if (srv_numa_interleave) - { - struct bitmask *numa_mems_allowed= numa_get_mems_allowed(); - MEM_MAKE_DEFINED(numa_mems_allowed, sizeof *numa_mems_allowed); - if (mbind(mem, mem_size(), MPOL_INTERLEAVE, - numa_mems_allowed->maskp, numa_mems_allowed->size, - MPOL_MF_MOVE)) - { - ib::warn() << "Failed to set NUMA memory policy of" - " buffer pool page frames to MPOL_INTERLEAVE" - " (error: " << strerror(errno) << ")."; - } - numa_bitmask_free(numa_mems_allowed); - } -#endif /* HAVE_LIBNUMA */ - - - /* Allocate the block descriptors from - the start of the memory block. */ - blocks= reinterpret_cast(mem); - - /* Align a pointer to the first frame. Note that when - opt_large_page_size is smaller than srv_page_size, - (with max srv_page_size at 64k don't think any hardware - makes this true), - we may allocate one fewer block than requested. When - it is bigger, we may allocate more blocks than requested. */ - static_assert(sizeof(byte*) == sizeof(ulint), "pointer size"); - - byte *frame= reinterpret_cast((reinterpret_cast(mem) + - srv_page_size - 1) & - ~ulint{srv_page_size - 1}); - size= (mem_pfx.m_size >> srv_page_size_shift) - (frame != mem); - - /* Subtract the space needed for block descriptors. */ - { - ulint s= size; - - while (frame < reinterpret_cast(blocks + s)) - { - frame+= srv_page_size; - s--; - } - - size= s; - } - - /* Init block structs and assign frames for them. Then we assign the - frames to the first blocks (we already mapped the memory above). */ - - buf_block_t *block= blocks; + static_assert(ut_is_2pow(innodb_buffer_pool_extent_size), ""); + ut_ad(static_cast(ptr) >= buf_pool.memory); - for (auto i= size; i--; ) { - buf_block_init(block, frame); - MEM_UNDEFINED(block->page.frame, srv_page_size); - /* Add the block to the free list */ - UT_LIST_ADD_LAST(buf_pool.free, &block->page); + byte *first_block= reinterpret_cast + (reinterpret_cast(ptr) & ~(innodb_buffer_pool_extent_size - 1)); + const size_t first_frame= + first_frame_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; - ut_d(block->page.in_free_list = TRUE); - block++; - frame+= srv_page_size; - } - - reg(); - - return true; + ut_ad(static_cast(ptr) >= first_block + first_frame); + return reinterpret_cast(first_block) + + (((size_t(ptr) & (innodb_buffer_pool_extent_size - 1)) - first_frame) >> + srv_page_size_shift); } -#ifdef UNIV_DEBUG -/** Check that all file pages in the buffer chunk are in a replaceable state. -@return address of a non-free block -@retval nullptr if all freed */ -inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const noexcept +/** Determine the address of the first invalid block descriptor +@param n_blocks buf_pool.n_blocks +@return offset of the first invalid buf_block_t, relative to buf_pool.memory */ +static size_t block_descriptors_in_bytes(size_t n_blocks) noexcept { - buf_block_t *block= blocks; - for (auto i= size; i--; block++) - { - if (block->page.in_file()) - { - /* The uncompressed buffer pool should never - contain ROW_FORMAT=COMPRESSED block descriptors. */ - ut_ad(block->page.frame); - const lsn_t lsn= block->page.oldest_modification(); + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + const size_t extent_size= pages_in_extent[ssize]; + return n_blocks / extent_size * innodb_buffer_pool_extent_size + + (n_blocks % extent_size) * sizeof(buf_block_t); +} - if (srv_read_only_mode) - { - /* The page cleaner is disabled in read-only mode. No pages - can be dirtied, so all of them must be clean. */ - ut_ad(lsn == 0 || lsn == recv_sys.lsn || - srv_force_recovery == SRV_FORCE_NO_LOG_REDO); - break; - } +buf_block_t *buf_pool_t::get_nth_page(size_t pos) const noexcept +{ + mysql_mutex_assert_owner(&mutex); + ut_ad(pos < n_blocks); + return reinterpret_cast + (memory + block_descriptors_in_bytes(pos)); +} - if (fsp_is_system_temporary(block->page.id().space())) - { - ut_ad(lsn == 0 || lsn == 2); - break; - } +buf_block_t *buf_pool_t::allocate() noexcept +{ + mysql_mutex_assert_owner(&mutex); - if (lsn > 1 || !block->page.can_relocate()) - return block; + while (buf_page_t *b= UT_LIST_GET_FIRST(free)) + { + ut_ad(b->in_free_list); + ut_d(b->in_free_list = FALSE); + ut_ad(!b->oldest_modification()); + ut_ad(!b->in_LRU_list); + ut_a(!b->in_file()); + UT_LIST_REMOVE(free, b); - break; + if (UNIV_LIKELY(!n_blocks_to_withdraw) || !withdraw(*b)) + { + /* No adaptive hash index entries may point to a free block. */ + assert_block_ahi_empty(reinterpret_cast(b)); + b->set_state(buf_page_t::MEMORY); + b->set_os_used(); + return reinterpret_cast(b); } } return nullptr; } -#endif /* UNIV_DEBUG */ /** Create the hash table. @param n the lower bound of n_cells */ @@ -1210,96 +1265,189 @@ array= static_cast(v); } +size_t buf_pool_t::get_n_blocks(size_t size_in_bytes) noexcept +{ + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + size_t n_blocks_alloc= size_in_bytes / innodb_buffer_pool_extent_size * + pages_in_extent[ssize]; + + if (const size_t incomplete_extent_pages= + (size_in_bytes & (innodb_buffer_pool_extent_size - 1)) >> + srv_page_size_shift) + { + ssize_t d= incomplete_extent_pages - first_page_in_extent[ssize]; + ut_ad(d > 0); + n_blocks_alloc+= d; + } + + return n_blocks_alloc; +} + +size_t buf_pool_t::blocks_in_bytes(size_t n_blocks) noexcept +{ + const size_t shift{srv_page_size_shift}; + const size_t ssize{shift - UNIV_PAGE_SIZE_SHIFT_MIN}; + const size_t extent_size= pages_in_extent[ssize]; + size_t size_in_bytes= n_blocks / extent_size * + innodb_buffer_pool_extent_size; + if (size_t remainder= n_blocks % extent_size) + size_in_bytes+= (remainder + first_page_in_extent[ssize]) << shift; + ut_ad(get_n_blocks(size_in_bytes) == n_blocks); + return size_in_bytes; +} + /** Create the buffer pool. @return whether the creation failed */ -bool buf_pool_t::create() +bool buf_pool_t::create() noexcept { ut_ad(this == &buf_pool); - ut_ad(srv_buf_pool_size % srv_buf_pool_chunk_unit == 0); ut_ad(!is_initialised()); - ut_ad(srv_buf_pool_size > 0); - ut_ad(!resizing); - ut_ad(!chunks_old); + ut_ad(size_in_bytes_requested > 0); + ut_ad(!(size_in_bytes_max & (innodb_buffer_pool_extent_size - 1))); + ut_ad(!(size_in_bytes_requested & ((1U << 20) - 1))); + ut_ad(size_in_bytes_requested <= size_in_bytes_max); /* mariabackup loads tablespaces, and it requires field_ref_zero to be allocated before innodb initialization */ ut_ad(srv_operation >= SRV_OPERATION_RESTORE || !field_ref_zero); - NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; - - if (!field_ref_zero) { + if (!field_ref_zero) + { if (auto b= aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096)) + { field_ref_zero= static_cast (memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX)); - else - return true; + goto init; + } + + oom: + ut_ad(!is_initialised()); + sql_print_error("InnoDB: Cannot map innodb_buffer_pool_size_max=%zum", + size_in_bytes_max >> 20); + return true; + } + + init: + DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", goto oom;); + size_t size= size_in_bytes_max; + sql_print_information("InnoDB: innodb_buffer_pool_size_max=%zum," + " innodb_buffer_pool_size=%zum", + size >> 20, size_in_bytes_requested >> 20); + + retry: + { + NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; +#ifdef _WIN32 + memory_unaligned= my_virtual_mem_reserve(&size); +#else + memory_unaligned= my_large_virtual_alloc(&size); +#endif } - chunk_t::map_reg= UT_NEW_NOKEY(chunk_t::map()); + if (!memory_unaligned) + goto oom; - new(&allocator) ut_allocator(mem_key_buf_buf_pool); + const size_t alignment_waste= + ((~size_t(memory_unaligned) & (innodb_buffer_pool_extent_size - 1)) + 1) & + (innodb_buffer_pool_extent_size - 1); - n_chunks= srv_buf_pool_size / srv_buf_pool_chunk_unit; - const size_t chunk_size= srv_buf_pool_chunk_unit; + if (size < size_in_bytes_max + alignment_waste) + { + my_virtual_mem_release(memory_unaligned, size); + size+= 1 + + (~size_t(memory_unaligned) & (innodb_buffer_pool_extent_size - 1)); + goto retry; + } - chunks= static_cast(ut_zalloc_nokey(n_chunks * sizeof *chunks)); - UT_LIST_INIT(free, &buf_page_t::list); - curr_size= 0; - auto chunk= chunks; + MEM_UNDEFINED(memory_unaligned, size); + ut_dontdump(memory_unaligned, size, true); + memory= memory_unaligned + alignment_waste; + size_unaligned= size; + size-= alignment_waste; + size&= ~(innodb_buffer_pool_extent_size - 1); - do + const size_t actual_size= size_in_bytes_requested; + ut_ad(actual_size <= size); + + size_in_bytes= actual_size; + os_total_large_mem_allocated+= actual_size; + +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, actual_size, &owner); +#endif +#ifdef _WIN32 + if (!my_virtual_mem_commit(memory, actual_size)) { - if (!chunk->create(chunk_size)) - { - while (--chunk >= chunks) - { - buf_block_t* block= chunk->blocks; + my_virtual_mem_release(memory_unaligned, size_unaligned); + memory= nullptr; + memory_unaligned= nullptr; + goto oom; + } +#else + update_malloc_size(actual_size, 0); +#endif - for (auto i= chunk->size; i--; block++) - block->page.lock.free(); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) + { + struct bitmask *numa_mems_allowed= numa_get_mems_allowed(); + MEM_MAKE_DEFINED(numa_mems_allowed, sizeof *numa_mems_allowed); + if (mbind(memory_unaligned, size_unaligned, MPOL_INTERLEAVE, + numa_mems_allowed->maskp, numa_mems_allowed->size, + MPOL_MF_MOVE)) + sql_print_warning("InnoDB: Failed to set NUMA memory policy of" + " buffer pool page frames to MPOL_INTERLEAVE" + " (error: %s).", strerror(errno)); + numa_bitmask_free(numa_mems_allowed); + } +#endif /* HAVE_LIBNUMA */ - allocator.deallocate_large_dodump(chunk->mem, &chunk->mem_pfx); - } - ut_free(chunks); - chunks= nullptr; - UT_DELETE(chunk_t::map_reg); - chunk_t::map_reg= nullptr; - aligned_free(const_cast(field_ref_zero)); - field_ref_zero= nullptr; - ut_ad(!is_initialised()); - return true; - } + n_blocks= get_n_blocks(actual_size); + n_blocks_to_withdraw= 0; + UT_LIST_INIT(free, &buf_page_t::list); + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; - curr_size+= chunk->size; + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + { + buf_block_t *block= reinterpret_cast(extent); + const buf_block_t *extent_end= block + pages_in_extent[ssize]; + if (reinterpret_cast(extent_end) > end) + extent_end= reinterpret_cast(end); + MEM_MAKE_DEFINED(block, (extent_end - block) * sizeof *block); + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize]; + block < extent_end; block++, frame+= srv_page_size) + { + ut_ad(!memcmp(block, field_ref_zero, sizeof *block)); + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } } - while (++chunk < chunks + n_chunks); - ut_ad(is_initialised()); #if defined(__aarch64__) mysql_mutex_init(buf_pool_mutex_key, &mutex, MY_MUTEX_INIT_FAST); #else mysql_mutex_init(buf_pool_mutex_key, &mutex, nullptr); #endif + UT_LIST_INIT(withdrawn, &buf_page_t::list); UT_LIST_INIT(LRU, &buf_page_t::LRU); - UT_LIST_INIT(withdraw, &buf_page_t::list); - withdraw_target= 0; UT_LIST_INIT(flush_list, &buf_page_t::list); UT_LIST_INIT(unzip_LRU, &buf_block_t::unzip_LRU); for (size_t i= 0; i < UT_ARR_SIZE(zip_free); ++i) UT_LIST_INIT(zip_free[i], &buf_buddy_free_t::list); - ulint s= curr_size; + ulint s= n_blocks; s/= BUF_READ_AHEAD_PORTION; read_ahead_area= s >= READ_AHEAD_PAGES ? READ_AHEAD_PAGES : my_round_up_to_next_power(static_cast(s)); - curr_pool_size= srv_buf_pool_size; - n_chunks_new= n_chunks; - - page_hash.create(2 * curr_size); - zip_hash.create(2 * curr_size); - last_printout_time= time(NULL); + page_hash.create(2 * n_blocks); + last_printout_time= time(nullptr); mysql_mutex_init(flush_list_mutex_key, &flush_list_mutex, MY_MUTEX_INIT_FAST); @@ -1318,14 +1466,8 @@ io_buf.create((srv_n_read_io_threads + srv_n_write_io_threads) * OS_AIO_N_PENDING_IOS_PER_THREAD); - /* FIXME: remove some of these variables */ - srv_buf_pool_curr_size= curr_pool_size; - srv_buf_pool_old_size= srv_buf_pool_size; - srv_buf_pool_base_size= srv_buf_pool_size; - last_activity_count= srv_get_activity_count(); - chunk_t::map_ref= chunk_t::map_reg; buf_LRU_old_ratio_update(100 * 3 / 8, false); btr_search_sys_create(); @@ -1334,6 +1476,7 @@ buf_mem_pressure_detect_init(); #endif ut_ad(is_initialised()); + sql_print_information("InnoDB: Completed initialization of buffer pool"); return false; } @@ -1368,14 +1511,31 @@ } } - for (auto chunk= chunks + n_chunks; --chunk >= chunks; ) { - buf_block_t *block= chunk->blocks; + const size_t size{size_in_bytes}; - for (auto i= chunk->size; i--; block++) - block->page.lock.free(); + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + MEM_MAKE_DEFINED(&block->page.lock, sizeof &block->page.lock); + block->page.lock.free(); + } - allocator.deallocate_large_dodump(chunk->mem, &chunk->mem_pfx); + ut_dodump(memory_unaligned, size_unaligned); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_free)(mem_key_buf_buf_pool, size, owner); + owner= nullptr; +#endif + os_total_large_mem_allocated-= size; + my_virtual_mem_decommit(memory, size); + my_virtual_mem_release(memory_unaligned, size_unaligned); + memory= nullptr; + memory_unaligned= nullptr; } pthread_cond_destroy(&done_flush_LRU); @@ -1383,137 +1543,13 @@ pthread_cond_destroy(&do_flush_list); pthread_cond_destroy(&done_free); - ut_free(chunks); - chunks= nullptr; page_hash.free(); - zip_hash.free(); io_buf.close(); - UT_DELETE(chunk_t::map_reg); - chunk_t::map_reg= chunk_t::map_ref= nullptr; aligned_free(const_cast(field_ref_zero)); field_ref_zero= nullptr; } -/** Try to reallocate a control block. -@param block control block to reallocate -@return whether the reallocation succeeded */ -inline bool buf_pool_t::realloc(buf_block_t *block) noexcept -{ - buf_block_t* new_block; - - mysql_mutex_assert_owner(&mutex); - ut_ad(block->page.in_file()); - ut_ad(block->page.frame); - - new_block = buf_LRU_get_free_only(); - - if (new_block == NULL) { - mysql_mutex_lock(&buf_pool.flush_list_mutex); - page_cleaner_wakeup(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - return(false); /* free list was not enough */ - } - - const page_id_t id{block->page.id()}; - hash_chain& chain = page_hash.cell_get(id.fold()); - page_hash_latch& hash_lock = page_hash.lock_get(chain); - /* It does not make sense to use transactional_lock_guard - here, because copying innodb_page_size (4096 to 65536) bytes - as well as other changes would likely make the memory - transaction too large. */ - hash_lock.lock(); - - if (block->page.can_relocate()) { - memcpy_aligned( - new_block->page.frame, block->page.frame, - srv_page_size); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - const auto frame = new_block->page.frame; - new_block->page.lock.free(); - new (&new_block->page) buf_page_t(block->page); - new_block->page.frame = frame; - - /* relocate LRU list */ - if (buf_page_t* prev_b = buf_pool.LRU_remove(&block->page)) { - UT_LIST_INSERT_AFTER(LRU, prev_b, &new_block->page); - } else { - UT_LIST_ADD_FIRST(LRU, &new_block->page); - } - - if (LRU_old == &block->page) { - LRU_old = &new_block->page; - } - - ut_ad(new_block->page.in_LRU_list); - - /* relocate unzip_LRU list */ - if (block->page.zip.data != NULL) { - ut_ad(block->in_unzip_LRU_list); - ut_d(new_block->in_unzip_LRU_list = true); - - buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block); - UT_LIST_REMOVE(unzip_LRU, block); - - ut_d(block->in_unzip_LRU_list = false); - block->page.zip.data = NULL; - page_zip_set_size(&block->page.zip, 0); - - if (prev_block != NULL) { - UT_LIST_INSERT_AFTER(unzip_LRU, prev_block, new_block); - } else { - UT_LIST_ADD_FIRST(unzip_LRU, new_block); - } - } else { - ut_ad(!block->in_unzip_LRU_list); - ut_d(new_block->in_unzip_LRU_list = false); - } - - /* relocate page_hash */ - hash_chain& chain = page_hash.cell_get(id.fold()); - ut_ad(&block->page == page_hash.get(id, chain)); - buf_pool.page_hash.replace(chain, &block->page, - &new_block->page); - buf_block_modify_clock_inc(block); - static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - memset_aligned<4>(block->page.frame - + FIL_PAGE_OFFSET, 0xff, 4); - static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, - "not perfect alignment"); - memset_aligned<2>(block->page.frame - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); - MEM_UNDEFINED(block->page.frame, srv_page_size); - block->page.set_state(buf_page_t::REMOVE_HASH); - if (!fsp_is_system_temporary(id.space())) { - buf_flush_relocate_on_flush_list(&block->page, - &new_block->page); - } - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - block->page.set_corrupt_id(); - - /* set other flags of buf_block_t */ - -#ifdef BTR_CUR_HASH_ADAPT - /* This code should only be executed by resize(), - while the adaptive hash index is disabled. */ - assert_block_ahi_empty(block); - assert_block_ahi_empty_on_init(new_block); - ut_ad(!block->index); - new_block->index = NULL; - new_block->n_hash_helps = 0; - new_block->n_fields = 1; - new_block->left_side = TRUE; -#endif /* BTR_CUR_HASH_ADAPT */ - ut_d(block->page.set_state(buf_page_t::MEMORY)); - /* free block */ - new_block = block; - } - - hash_lock.unlock(); - buf_LRU_block_free_non_file_page(new_block); - return(true); /* free_list was enough */ -} - void buf_pool_t::io_buf_t::create(ulint n_slots) noexcept { this->n_slots= n_slots; @@ -1552,720 +1588,528 @@ } } -/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status -to the specified string. The format and the following parameters are the -same as the ones used for printf(3). -@param[in] fmt format -@param[in] ... extra parameters according to fmt */ -static -void -buf_resize_status( - const char* fmt, - ...) +ATTRIBUTE_COLD bool buf_pool_t::withdraw(buf_page_t &bpage) noexcept { - va_list ap; - - va_start(ap, fmt); - - vsnprintf( - export_vars.innodb_buffer_pool_resize_status, - sizeof(export_vars.innodb_buffer_pool_resize_status), - fmt, ap); - - va_end(ap); - - ib::info() << export_vars.innodb_buffer_pool_resize_status; + mysql_mutex_assert_owner(&mutex); + ut_ad(n_blocks_to_withdraw); + ut_ad(first_to_withdraw); + ut_ad(!bpage.zip.data); + if (&bpage < first_to_withdraw) + return false; + n_blocks_to_withdraw--; + bpage.lock.free(); + UT_LIST_ADD_LAST(withdrawn, &bpage); + return true; } -/** Withdraw blocks from the buffer pool until meeting withdraw_target. -@return whether retry is needed */ -inline bool buf_pool_t::withdraw_blocks() noexcept +ATTRIBUTE_COLD buf_pool_t::shrink_status buf_pool_t::shrink(size_t size) + noexcept { - buf_block_t* block; - ulint loop_count = 0; - - ib::info() << "Start to withdraw the last " - << withdraw_target << " blocks."; - - while (UT_LIST_GET_LEN(withdraw) < withdraw_target) { - - /* try to withdraw from free_list */ - ulint count1 = 0; + mysql_mutex_assert_owner(&mutex); + buf_load_abort(); - mysql_mutex_lock(&mutex); - buf_buddy_condense_free(); - block = reinterpret_cast( - UT_LIST_GET_FIRST(free)); - while (block != NULL - && UT_LIST_GET_LEN(withdraw) < withdraw_target) { - ut_ad(block->page.in_free_list); - ut_ad(!block->page.oldest_modification()); - ut_ad(!block->page.in_LRU_list); - ut_a(!block->page.in_file()); - - buf_block_t* next_block; - next_block = reinterpret_cast( - UT_LIST_GET_NEXT( - list, &block->page)); - - if (will_be_withdrawn(block->page)) { - /* This should be withdrawn */ - UT_LIST_REMOVE(free, &block->page); - UT_LIST_ADD_LAST(withdraw, &block->page); - ut_d(block->in_withdraw_list = true); - count1++; - } - - block = next_block; - } - - /* reserve free_list length */ - if (UT_LIST_GET_LEN(withdraw) < withdraw_target) { - try_LRU_scan = false; - mysql_mutex_unlock(&mutex); - mysql_mutex_lock(&flush_list_mutex); - page_cleaner_wakeup(true); - my_cond_wait(&done_flush_list, - &flush_list_mutex.m_mutex); - mysql_mutex_unlock(&flush_list_mutex); - mysql_mutex_lock(&mutex); - } - - /* relocate blocks/buddies in withdrawn area */ - ulint count2 = 0; - - buf_pool_mutex_exit_forbid(); - for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage; - bpage; bpage = next_bpage) { - ut_ad(bpage->in_file()); - next_bpage = UT_LIST_GET_NEXT(LRU, bpage); - if (UNIV_LIKELY_NULL(bpage->zip.data) - && will_be_withdrawn(bpage->zip.data) - && bpage->can_relocate()) { - if (!buf_buddy_realloc( - bpage->zip.data, - page_zip_get_size(&bpage->zip))) { - /* failed to allocate block */ - break; - } - count2++; - if (bpage->frame) { - goto realloc_frame; - } - } - - if (bpage->frame && will_be_withdrawn(*bpage) - && bpage->can_relocate()) { -realloc_frame: - if (!realloc(reinterpret_cast( - bpage))) { - /* failed to allocate block */ - break; - } - count2++; - } - } - buf_pool_mutex_exit_allow(); - mysql_mutex_unlock(&mutex); - - buf_resize_status( - "Withdrawing blocks. (" ULINTPF "/" ULINTPF ").", - UT_LIST_GET_LEN(withdraw), - withdraw_target); - - ib::info() << "Withdrew " - << count1 << " blocks from free list." - << " Tried to relocate " << count2 << " blocks (" - << UT_LIST_GET_LEN(withdraw) << "/" - << withdraw_target << ")."; - - if (++loop_count >= 10) { - /* give up for now. - retried after user threads paused. */ - - ib::info() << "will retry to withdraw later"; - - /* need retry later */ - return(true); - } - } - - /* confirm withdrawn enough */ - for (const chunk_t* chunk = chunks + n_chunks_new, - * const echunk = chunks + n_chunks; chunk != echunk; chunk++) { - block = chunk->blocks; - for (ulint j = chunk->size; j--; block++) { - ut_a(block->page.state() == buf_page_t::NOT_USED); - ut_ad(block->in_withdraw_list); - } - } - - ib::info() << "Withdrawn target: " << UT_LIST_GET_LEN(withdraw) - << " blocks."; - - return(false); -} - - - -inline void buf_pool_t::page_hash_table::write_lock_all() noexcept -{ - for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1) + if (!n_blocks_to_withdraw) { - reinterpret_cast(array[n]).lock(); - if (!n) - break; + withdraw_done: + first_to_withdraw= nullptr; + while (buf_page_t *b= UT_LIST_GET_FIRST(withdrawn)) + { + UT_LIST_REMOVE(withdrawn, b); + /* satisfy the check in lazy_allocate() */ + ut_d(memset((void*) b, 0, sizeof(buf_block_t))); + } + return SHRINK_DONE; } -} + buf_buddy_condense_free(size); -inline void buf_pool_t::page_hash_table::write_unlock_all() noexcept -{ - for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1) + for (buf_page_t *b= UT_LIST_GET_FIRST(free), *next; b; b= next) { - reinterpret_cast(array[n]).unlock(); - if (!n) - break; - } -} + ut_ad(b->in_free_list); + ut_ad(!b->in_LRU_list); + ut_ad(!b->zip.data); + ut_ad(!b->oldest_modification()); + ut_a(b->state() == buf_page_t::NOT_USED); + next= UT_LIST_GET_NEXT(list, b); -namespace -{ - -struct find_interesting_trx -{ - void operator()(const trx_t &trx) - { - if (!trx.is_started()) - return; - if (trx.mysql_thd == nullptr) - return; - if (withdraw_started <= trx.start_time_micro) - return; - - if (!found) + if (b >= first_to_withdraw) { - sql_print_warning("InnoDB: The following trx might hold " - "the blocks in buffer pool to " - "be withdrawn. Buffer pool " - "resizing can complete only " - "after all the transactions " - "below release the blocks."); - found= true; + UT_LIST_REMOVE(free, b); + b->lock.free(); + UT_LIST_ADD_LAST(withdrawn, b); + if (!--n_blocks_to_withdraw) + goto withdraw_done; } - - lock_trx_print_wait_and_mvcc_state(stderr, &trx, current_time); } - bool &found; - /** microsecond_interval_timer() */ - const ulonglong withdraw_started; - const my_hrtime_t current_time; -}; - -} // namespace - -/** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */ -inline void buf_pool_t::resize() -{ - ut_ad(this == &buf_pool); - ut_ad(srv_shutdown_state < SRV_SHUTDOWN_CLEANUP); - - bool warning = false; - - NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; - - ut_ad(!resize_in_progress()); - ut_ad(srv_buf_pool_chunk_unit > 0); - - ulint new_instance_size = srv_buf_pool_size >> srv_page_size_shift; - std::ostringstream str_old_size, str_new_size, str_chunk_size; - str_old_size << ib::bytes_iec{srv_buf_pool_old_size}; - str_new_size << ib::bytes_iec{srv_buf_pool_size}; - str_chunk_size << ib::bytes_iec{srv_buf_pool_chunk_unit}; + buf_block_t *block= allocate(); + size_t scanned= 0; + for (buf_page_t *b= lru_scan_itr.start(), *prev; block && b; b= prev) + { + ut_ad(b->in_LRU_list); + ut_a(b->in_file()); - buf_resize_status("Resizing buffer pool from %s to %s (unit = %s).", - str_old_size.str().c_str(), - str_new_size.str().c_str(), - str_chunk_size.str().c_str()); + prev= UT_LIST_GET_PREV(LRU, b); -#ifdef BTR_CUR_HASH_ADAPT - /* disable AHI if needed */ - buf_resize_status("Disabling adaptive hash index."); + if (!b->can_relocate()) + { + next: + if (++scanned & 31) + continue; + /* Avoid starvation by periodically releasing buf_pool.mutex. */ + lru_scan_itr.set(prev); + mysql_mutex_unlock(&mutex); + mysql_mutex_lock(&mutex); + prev= lru_scan_itr.get(); + continue; + } - btr_search_s_lock_all(); - const bool btr_search_disabled = btr_search_enabled; - btr_search_s_unlock_all(); + const page_id_t id{b->id()}; + hash_chain &chain= page_hash.cell_get(id.fold()); + page_hash_latch &hash_lock= page_hash.lock_get(chain); + hash_lock.lock(); - btr_search_disable(); + { + /* relocate flush_list and b->page.zip */ + bool have_flush_list_mutex= false; - if (btr_search_disabled) { - ib::info() << "disabled adaptive hash index."; - } -#endif /* BTR_CUR_HASH_ADAPT */ + switch (b->oldest_modification()) { + case 2: + ut_ad(fsp_is_system_temporary(id.space())); + /* fall through */ + case 0: + break; + default: + mysql_mutex_lock(&flush_list_mutex); + switch (ut_d(lsn_t om=) b->oldest_modification()) { + case 1: + delete_from_flush_list(b); + /* fall through */ + case 0: + mysql_mutex_unlock(&flush_list_mutex); + break; + default: + ut_ad(om != 2); + have_flush_list_mutex= true; + } + } - mysql_mutex_lock(&mutex); - ut_ad(n_chunks_new == n_chunks); - ut_ad(UT_LIST_GET_LEN(withdraw) == 0); + if (!b->can_relocate()) + { + next_quick: + if (have_flush_list_mutex) + mysql_mutex_unlock(&flush_list_mutex); + hash_lock.unlock(); + continue; + } - n_chunks_new = (new_instance_size << srv_page_size_shift) - / srv_buf_pool_chunk_unit; - curr_size = n_chunks_new * chunks->size; - mysql_mutex_unlock(&mutex); + if (UNIV_UNLIKELY(will_be_withdrawn(b->zip.data, size))) + { + block= buf_buddy_shrink(b, block); + ut_ad(mach_read_from_4(b->zip.data + FIL_PAGE_OFFSET) == id.page_no()); + if (UNIV_UNLIKELY(!n_blocks_to_withdraw)) + { + if (have_flush_list_mutex) + mysql_mutex_unlock(&flush_list_mutex); + hash_lock.unlock(); + if (block) + buf_LRU_block_free_non_file_page(block); + goto withdraw_done; + } + if (!block && !(block= allocate())) + goto next_quick; + } - if (is_shrinking()) { - /* set withdraw target */ - size_t w = 0; + if (!b->frame || b < first_to_withdraw) + goto next_quick; - for (const chunk_t* chunk = chunks + n_chunks_new, - * const echunk = chunks + n_chunks; - chunk != echunk; chunk++) - w += chunk->size; + ut_ad(is_uncompressed_current(b)); - ut_ad(withdraw_target == 0); - withdraw_target = w; - } + byte *const frame= block->page.frame; + memcpy_aligned<4096>(frame, b->frame, srv_page_size); + b->lock.free(); + block->page.lock.free(); + new(&block->page) buf_page_t(*b); + block->page.frame= frame; - buf_resize_status("Withdrawing blocks to be shrunken."); + if (have_flush_list_mutex) + { + buf_flush_relocate_on_flush_list(b, &block->page); + mysql_mutex_unlock(&flush_list_mutex); + } + } - ulonglong withdraw_started = microsecond_interval_timer(); - ulonglong message_interval = 60ULL * 1000 * 1000; - ulint retry_interval = 1; + /* relocate LRU list */ + if (buf_page_t *prev_b= LRU_remove(b)) + UT_LIST_INSERT_AFTER(LRU, prev_b, &block->page); + else + UT_LIST_ADD_FIRST(LRU, &block->page); -withdraw_retry: - /* wait for the number of blocks fit to the new size (if needed)*/ - bool should_retry_withdraw = is_shrinking() - && withdraw_blocks(); + if (LRU_old == b) + LRU_old= &block->page; - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - /* abort to resize for shutdown. */ - return; - } + ut_ad(block->page.in_LRU_list); - /* abort buffer pool load */ - buf_load_abort(); + /* relocate page_hash */ + ut_ad(b == page_hash.get(id, chain)); + page_hash.replace(chain, b, &block->page); - const ulonglong current_time = microsecond_interval_timer(); + if (b->zip.data) + { + ut_ad(mach_read_from_4(b->zip.data + FIL_PAGE_OFFSET) == id.page_no()); + b->zip.data= nullptr; + /* relocate unzip_LRU list */ + buf_block_t *old_block= reinterpret_cast(b); + ut_ad(old_block->in_unzip_LRU_list); + ut_d(old_block->in_unzip_LRU_list= false); + ut_d(block->in_unzip_LRU_list= true); - if (should_retry_withdraw - && current_time - withdraw_started >= message_interval) { + buf_block_t *prev= UT_LIST_GET_PREV(unzip_LRU, old_block); + UT_LIST_REMOVE(unzip_LRU, old_block); - if (message_interval > 900000000) { - message_interval = 1800000000; - } else { - message_interval *= 2; - } + if (prev) + UT_LIST_INSERT_AFTER(unzip_LRU, prev, block); + else + UT_LIST_ADD_FIRST(unzip_LRU, block); + } - bool found= false; - find_interesting_trx f - {found, withdraw_started, my_hrtime_coarse()}; - withdraw_started = current_time; - - /* This is going to exceed the maximum size of a - memory transaction. */ - LockMutexGuard g{SRW_LOCK_CALL}; - trx_sys.trx_list.for_each(f); - } - - if (should_retry_withdraw) { - ib::info() << "Will retry to withdraw " << retry_interval - << " seconds later."; - std::this_thread::sleep_for( - std::chrono::seconds(retry_interval)); + buf_block_modify_clock_inc(block); - if (retry_interval > 5) { - retry_interval = 10; - } else { - retry_interval *= 2; - } +#ifdef BTR_CUR_HASH_ADAPT + assert_block_ahi_empty_on_init(block); + block->index= nullptr; + block->n_hash_helps= 0; + block->n_fields= 1; + block->left_side= true; +#endif /* BTR_CUR_HASH_ADAPT */ + hash_lock.unlock(); - goto withdraw_retry; - } + ut_d(b->in_LRU_list= false); - buf_resize_status("Latching entire buffer pool."); + b->set_state(buf_page_t::NOT_USED); + UT_LIST_ADD_LAST(withdrawn, b); + if (!--n_blocks_to_withdraw) + goto withdraw_done; -#ifndef DBUG_OFF - { - bool should_wait = true; + block= allocate(); + goto next; + } - while (should_wait) { - should_wait = false; - DBUG_EXECUTE_IF( - "ib_buf_pool_resize_wait_before_resize", - should_wait = true; - std::this_thread::sleep_for( - std::chrono::milliseconds(10));); - } - } -#endif /* !DBUG_OFF */ + if (UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < usable_size() / 20) + return SHRINK_ABORT; - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } + mysql_mutex_lock(&flush_list_mutex); - /* Indicate critical path */ - resizing.store(true, std::memory_order_relaxed); + if (LRU_warned && !UT_LIST_GET_FIRST(free)) + { + LRU_warned_clear(); + mysql_mutex_unlock(&flush_list_mutex); + return SHRINK_ABORT; + } - mysql_mutex_lock(&mutex); - page_hash.write_lock_all(); + try_LRU_scan= false; + mysql_mutex_unlock(&mutex); + page_cleaner_wakeup(true); + my_cond_wait(&done_flush_list, &flush_list_mutex.m_mutex); + mysql_mutex_unlock(&flush_list_mutex); + mysql_mutex_lock(&mutex); - chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map()); + if (!n_blocks_to_withdraw) + goto withdraw_done; - /* add/delete chunks */ + return SHRINK_IN_PROGRESS; +} - buf_resize_status("Resizing buffer pool from " - ULINTPF " chunks to " ULINTPF " chunks.", - n_chunks, n_chunks_new); - - if (is_shrinking()) { - /* delete chunks */ - chunk_t* chunk = chunks + n_chunks_new; - const chunk_t* const echunk = chunks + n_chunks; - - ulint sum_freed = 0; - - while (chunk < echunk) { - /* buf_LRU_block_free_non_file_page() invokes - MEM_NOACCESS() on any buf_pool.free blocks. - We must cancel the effect of that. In - MemorySanitizer, MEM_NOACCESS() is no-op, so - we must not do anything special for it here. */ -#ifdef HAVE_valgrind -# if !__has_feature(memory_sanitizer) - MEM_MAKE_DEFINED(chunk->mem, chunk->mem_size()); +inline void buf_pool_t::shrunk(size_t size, size_t reduced) noexcept +{ + ut_ad(size + reduced == size_in_bytes); + size_in_bytes_requested= size; + size_in_bytes= size; +# ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* Only page_guess() may read this memory, which after + my_virtual_mem_decommit() may be zeroed out or preserve its original + contents. Try to catch any unintended reads outside page_guess(). */ + MEM_UNDEFINED(memory + size, size_in_bytes_max - size); +# else + for (size_t n= page_hash.pad(page_hash.n_cells), i= 0; i < n; + i+= page_hash.ELEMENTS_PER_LATCH + 1) + { + auto &latch= reinterpret_cast(page_hash.array[i]); + latch.lock(); + /* We already shrunk size_in_bytes. The exclusive lock here + ensures that any page_guess() will detect an out-of-bounds + guess before we invoke my_virtual_mem_decommit() below. */ + latch.unlock(); + } # endif -#else - MEM_MAKE_ADDRESSABLE(chunk->mem, chunk->size); + my_virtual_mem_decommit(memory + size, reduced); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_free)(mem_key_buf_buf_pool, reduced, owner); #endif +} - buf_block_t* block = chunk->blocks; - - for (ulint j = chunk->size; j--; block++) { - block->page.lock.free(); - } - - allocator.deallocate_large_dodump( - chunk->mem, &chunk->mem_pfx); - sum_freed += chunk->size; - ++chunk; - } - - /* discard withdraw list */ - UT_LIST_INIT(withdraw, &buf_page_t::list); - withdraw_target = 0; - - ib::info() << n_chunks - n_chunks_new - << " Chunks (" << sum_freed - << " blocks) were freed."; - - n_chunks = n_chunks_new; - } - - { - /* reallocate chunks */ - const size_t new_chunks_size - = n_chunks_new * sizeof(chunk_t); - - chunk_t* new_chunks = static_cast( - ut_zalloc_nokey_nofatal(new_chunks_size)); - - DBUG_EXECUTE_IF("buf_pool_resize_chunk_null", - ut_free(new_chunks); new_chunks= nullptr; ); - - if (!new_chunks) { - ib::error() << "failed to allocate" - " the chunk array."; - n_chunks_new = n_chunks; - warning = true; - chunks_old = NULL; - goto calc_buf_pool_size; - } - - ulint n_chunks_copy = ut_min(n_chunks_new, n_chunks); - - memcpy(new_chunks, chunks, - n_chunks_copy * sizeof *new_chunks); +ATTRIBUTE_COLD void buf_pool_t::resize(size_t size, THD *thd) noexcept +{ + ut_ad(this == &buf_pool); + mysql_mutex_assert_owner(&LOCK_global_system_variables); + ut_ad(size <= size_in_bytes_max); + if (my_use_large_pages) + { + my_error(ER_VARIABLE_IS_READONLY, MYF(0), "InnoDB", + "innodb_buffer_pool_size", "large_pages=0"); + return; + } - for (ulint j = 0; j < n_chunks_copy; j++) { - new_chunks[j].reg(); - } + size_t n_blocks_new= get_n_blocks(size); - chunks_old = chunks; - chunks = new_chunks; - } + mysql_mutex_lock(&mutex); - if (n_chunks_new > n_chunks) { - /* add chunks */ - ulint sum_added = 0; - ulint n = n_chunks; - const size_t unit = srv_buf_pool_chunk_unit; - - for (chunk_t* chunk = chunks + n_chunks, - * const echunk = chunks + n_chunks_new; - chunk != echunk; chunk++) { - if (!chunk->create(unit)) { - ib::error() << "failed to allocate" - " memory for buffer pool chunk"; + const size_t old_size= size_in_bytes; + if (first_to_withdraw || old_size != size_in_bytes_requested) + { + mysql_mutex_unlock(&mutex); + my_printf_error(ER_WRONG_USAGE, + "innodb_buffer_pool_size change is already in progress", + MYF(0)); + return; + } - warning = true; - n_chunks_new = n_chunks; - break; - } + ut_ad(UT_LIST_GET_LEN(withdrawn) == 0); + ut_ad(n_blocks_to_withdraw == 0); +#ifdef __linux__ + DBUG_EXECUTE_IF("trigger_garbage_collection", + mem_pressure_obj.trigger_collection();); +#endif - sum_added += chunk->size; - ++n; - } + if (size == old_size) + { + mysql_mutex_unlock(&mutex); + DBUG_EXECUTE_IF("trigger_garbage_collection", + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + garbage_collect();); + return; + } - ib::info() << n_chunks_new - n_chunks - << " chunks (" << sum_added - << " blocks) were added."; - - n_chunks = n; - } -calc_buf_pool_size: - /* recalc curr_size */ - ulint new_size = 0; +#ifdef BTR_CUR_HASH_ADAPT + bool ahi_disabled= false; +#endif - { - chunk_t* chunk = chunks; - const chunk_t* const echunk = chunk + n_chunks; - do { - new_size += chunk->size; - } while (++chunk != echunk); - } + const bool significant_change= + n_blocks_new > n_blocks * 2 || n_blocks > n_blocks_new * 2; + const ssize_t n_blocks_removed= n_blocks - n_blocks_new; - curr_size = new_size; - n_chunks_new = n_chunks; + if (n_blocks_removed <= 0) + { + if (!my_virtual_mem_commit(memory + old_size, size - old_size)) + { + mysql_mutex_unlock(&mutex); + sql_print_error("InnoDB: Cannot commit innodb_buffer_pool_size=%zum;" + " retaining innodb_buffer_pool_size=%zum", + size >> 20, old_size >> 20); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return; + } - if (chunks_old) { - ut_free(chunks_old); - chunks_old = NULL; - } + size_in_bytes_requested= size; + size_in_bytes= size; - chunk_t::map* chunk_map_old = chunk_t::map_ref; - chunk_t::map_ref = chunk_t::map_reg; + { + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + const size_t pages= pages_in_extent[ssize]; + const size_t first_extent= n_blocks / pages; - /* set size */ - ut_ad(UT_LIST_GET_LEN(withdraw) == 0); - ulint s= curr_size; - s/= BUF_READ_AHEAD_PORTION; - read_ahead_area= s >= READ_AHEAD_PAGES - ? READ_AHEAD_PAGES - : my_round_up_to_next_power(static_cast(s)); - curr_pool_size= n_chunks * srv_buf_pool_chunk_unit; - srv_buf_pool_curr_size= curr_pool_size;/* FIXME: remove*/ - extern ulonglong innobase_buffer_pool_size; - innobase_buffer_pool_size= buf_pool_size_align(srv_buf_pool_curr_size); - - const bool new_size_too_diff - = srv_buf_pool_base_size > srv_buf_pool_size * 2 - || srv_buf_pool_base_size * 2 < srv_buf_pool_size; + char *extent= memory + first_extent * innodb_buffer_pool_extent_size; - mysql_mutex_unlock(&mutex); - page_hash.write_unlock_all(); + buf_block_t *block= reinterpret_cast(extent); + if (const size_t first_blocks= n_blocks % pages) + { + /* Extend the last (partial) extent until its end */ + const buf_block_t *extent_end= block + + (first_extent == (n_blocks_new / pages) + ? (n_blocks_new % pages) + : pages); + block+= first_blocks; + memset((void*) block, 0, (extent_end - block) * sizeof *block); + + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize] + + (first_blocks << srv_page_size_shift); block < extent_end; + block++, frame+= srv_page_size) + { + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } + extent+= innodb_buffer_pool_extent_size; + } - UT_DELETE(chunk_map_old); + /* Fill in further extents; @see buf_pool_t::create() */ + for (const char *const end_new= memory + + block_descriptors_in_bytes(n_blocks_new); + extent < end_new; extent+= innodb_buffer_pool_extent_size) + { + block= reinterpret_cast(extent); + const buf_block_t *extent_end= block + pages; + if (reinterpret_cast(extent_end) > end_new) + extent_end= reinterpret_cast(end_new); + + memset((void*) block, 0, (extent_end - block) * sizeof *block); + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize]; + block < extent_end; block++, frame+= srv_page_size) + { + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } + } + } - resizing.store(false, std::memory_order_relaxed); + mysql_mutex_unlock(&LOCK_global_system_variables); + resized: + ut_ad(UT_LIST_GET_LEN(withdrawn) == 0); + ut_ad(n_blocks_to_withdraw == 0); + ut_ad(!first_to_withdraw); + const size_t old_blocks{n_blocks}; + n_blocks= n_blocks_new; + + size_t s= n_blocks_new / BUF_READ_AHEAD_PORTION; + read_ahead_area= s >= READ_AHEAD_PAGES + ? READ_AHEAD_PAGES + : my_round_up_to_next_power(uint32(s)); - /* Normalize other components, if the new size is too different */ - if (!warning && new_size_too_diff) { - srv_buf_pool_base_size = srv_buf_pool_size; + if (ssize_t d= size - old_size) + { + os_total_large_mem_allocated+= d; + if (d > 0) + { + /* Already committed memory earlier */ + ut_ad(n_blocks_removed <= 0); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, d, &owner); +#endif + } + else + shrunk(size, size_t(-d)); + } - buf_resize_status("Resizing other hash tables."); + mysql_mutex_unlock(&mutex); - srv_lock_table_size = 5 - * (srv_buf_pool_size >> srv_page_size_shift); - lock_sys.resize(srv_lock_table_size); - dict_sys.resize(); + if (significant_change) + { + sql_print_information("InnoDB: Resizing hash tables"); + srv_lock_table_size= 5 * n_blocks_new; + lock_sys.resize(srv_lock_table_size); + dict_sys.resize(); + } - ib::info() << "Resized hash tables: lock_sys," + ibuf_max_size_update(srv_change_buffer_max_size); #ifdef BTR_CUR_HASH_ADAPT - " adaptive hash index," -#endif /* BTR_CUR_HASH_ADAPT */ - " and dictionary."; - } - - /* normalize ibuf.max_size */ - ibuf_max_size_update(srv_change_buffer_max_size); - - if (srv_buf_pool_old_size != srv_buf_pool_size) { + if (ahi_disabled) + btr_search_enable(true); +#endif + mysql_mutex_lock(&LOCK_global_system_variables); + bool resized= n_blocks_removed < 0; + if (n_blocks_removed > 0) + { + mysql_mutex_lock(&mutex); + resized= size_in_bytes == old_size; + if (resized) + { + size_in_bytes_requested= size; + size_in_bytes= size; + } + mysql_mutex_unlock(&mutex); + } - buf_resize_status("Completed resizing buffer pool from %zu to %zu bytes." - ,srv_buf_pool_old_size, srv_buf_pool_size); - srv_buf_pool_old_size = srv_buf_pool_size; - } + if (resized) + sql_print_information("InnoDB: innodb_buffer_pool_size=%zum (%zu pages)" + " resized from %zum (%zu pages)", + size >> 20, n_blocks_new, old_size >> 20, + old_blocks); + } + else + { + size_t to_withdraw= size_t(n_blocks_removed); + n_blocks_to_withdraw= to_withdraw; + first_to_withdraw= &get_nth_page(n_blocks_new)->page; + size_in_bytes_requested= size; + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_unlock(&mutex); + DEBUG_SYNC_C("buf_pool_shrink_before_wakeup"); + mysql_mutex_lock(&flush_list_mutex); + page_cleaner_wakeup(true); + my_cond_wait(&done_flush_list, &flush_list_mutex.m_mutex); + mysql_mutex_unlock(&flush_list_mutex); #ifdef BTR_CUR_HASH_ADAPT - /* enable AHI if needed */ - if (btr_search_disabled) { - btr_search_enable(true); - ib::info() << "Re-enabled adaptive hash index."; - } + ahi_disabled= btr_search_disable(); #endif /* BTR_CUR_HASH_ADAPT */ + mysql_mutex_lock(&mutex); - if (warning) - buf_resize_status("Resizing buffer pool failed"); - - ut_d(validate()); - - return; -} + time_t last_message= 0; -#ifdef __linux__ -inline void buf_pool_t::garbage_collect() -{ - mysql_mutex_lock(&mutex); - size_t freed= 0; - -#ifdef BTR_CUR_HASH_ADAPT - /* buf_LRU_free_page() will temporarily release and reacquire - buf_pool.mutex for invoking btr_search_drop_page_hash_index(). Thus, - we must protect ourselves with the hazard pointer. */ -rescan: -#else - lru_hp.set(nullptr); -#endif - for (buf_page_t *bpage= UT_LIST_GET_LAST(LRU), *prev; bpage; bpage= prev) - { - prev= UT_LIST_GET_PREV(LRU, bpage); -#ifdef BTR_CUR_HASH_ADAPT - lru_hp.set(prev); -#endif - auto state= bpage->state(); - ut_ad(state >= buf_page_t::FREED); - ut_ad(bpage->in_LRU_list); - - /* We try to free any pages that can be freed without writing out - anything. */ - switch (bpage->oldest_modification()) { - case 0: - try_to_evict: - if (buf_LRU_free_page(bpage, true)) + do + { + time_t now= time(nullptr); + if (now - last_message > 15) { - evicted: - freed++; -#ifdef BTR_CUR_HASH_ADAPT - bpage= prev; - prev= lru_hp.get(); - if (!prev && bpage) - goto rescan; -#endif + if (last_message != 0 && to_withdraw == n_blocks_to_withdraw) + break; + to_withdraw= n_blocks_to_withdraw; + last_message= now; + sql_print_information("InnoDB: Trying to shrink" + " innodb_buffer_pool_size=%zum (%zu pages)" + " from %zum (%zu pages, to withdraw %zu)", + size >> 20, n_blocks_new, + old_size >> 20, n_blocks, to_withdraw); } - continue; - case 1: - break; - default: - if (state >= buf_page_t::UNFIXED) - continue; + shrink_status s{shrink(size)}; + if (s == SHRINK_DONE) + goto resized; + if (s != SHRINK_IN_PROGRESS) + break; } + while (!thd_kill_level(thd)); + + ut_ad(size_in_bytes > size_in_bytes_requested); + n_blocks_to_withdraw= 0; + first_to_withdraw= nullptr; + size_in_bytes_requested= size_in_bytes; - if (state < buf_page_t::READ_FIX && bpage->lock.u_lock_try(true)) + while (buf_page_t *b= UT_LIST_GET_FIRST(withdrawn)) { - ut_ad(!bpage->is_io_fixed()); - lsn_t oldest_modification= bpage->oldest_modification(); - switch (oldest_modification) { - case 1: - mysql_mutex_lock(&flush_list_mutex); - oldest_modification= bpage->oldest_modification(); - if (oldest_modification) - { - ut_ad(oldest_modification == 1); - delete_from_flush_list(bpage); - } - mysql_mutex_unlock(&flush_list_mutex); - /* fall through */ - case 0: - bpage->lock.u_unlock(true); - goto try_to_evict; - default: - if (bpage->state() < buf_page_t::UNFIXED && - oldest_modification <= log_sys.get_flushed_lsn()) - { - release_freed_page(bpage); - goto evicted; - } - else - bpage->lock.u_unlock(true); - } + UT_LIST_REMOVE(withdrawn, b); + UT_LIST_ADD_LAST(free, b); + ut_d(b->in_free_list= true); + ut_ad(b->state() == buf_page_t::NOT_USED); + b->lock.init(); } - } - -#if defined MADV_FREE - /* FIXME: Issue fewer calls for larger contiguous blocks of - memory. For now, we assume that this is acceptable, because this - code should be executed rarely. */ - for (buf_page_t *bpage= UT_LIST_GET_FIRST(free); bpage; - bpage= UT_LIST_GET_NEXT(list, bpage)) - madvise(bpage->frame, srv_page_size, MADV_FREE); -#endif - mysql_mutex_unlock(&mutex); - sql_print_information("InnoDB: Memory pressure event freed %zu pages", - freed); - return; -} -#endif /* __linux__ */ - -/** Thread pool task invoked by innodb_buffer_pool_size changes. */ -static void buf_resize_callback(void *) -{ - DBUG_ENTER("buf_resize_callback"); - ut_ad(srv_shutdown_state < SRV_SHUTDOWN_CLEANUP); - mysql_mutex_lock(&buf_pool.mutex); - const auto size= srv_buf_pool_size; - const bool work= srv_buf_pool_old_size != size; - mysql_mutex_unlock(&buf_pool.mutex); - - if (work) - buf_pool.resize(); - else - { - std::ostringstream sout; - sout << "Size did not change: old size = new size = " << size; - buf_resize_status(sout.str().c_str()); - } - DBUG_VOID_RETURN; -} -/* Ensure that task does not run in parallel, by setting max_concurrency to 1 for the thread group */ -static tpool::task_group single_threaded_group(1); -static tpool::waitable_task buf_resize_task(buf_resize_callback, - nullptr, &single_threaded_group); - -void buf_resize_start() -{ -#if !defined(DBUG_OFF) && defined(__linux__) - DBUG_EXECUTE_IF("trigger_garbage_collection", - { - mem_pressure_obj.trigger_collection(); + mysql_mutex_unlock(&mutex); + my_printf_error(ER_WRONG_USAGE, "innodb_buffer_pool_size change aborted", + MYF(ME_ERROR_LOG)); + mysql_mutex_lock(&LOCK_global_system_variables); } - ); -#endif - - srv_thread_pool->submit_task(&buf_resize_task); -} -void buf_resize_shutdown() -{ -#ifdef __linux__ - buf_mem_pressure_shutdown(); -#endif - buf_resize_task.wait(); + ut_d(validate()); } - /** Relocate a ROW_FORMAT=COMPRESSED block in the LRU list and buf_pool.page_hash. The caller must relocate bpage->list. @param bpage ROW_FORMAT=COMPRESSED only block @param dpage destination control block */ -static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) +static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) noexcept { const page_id_t id{bpage->id()}; buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); ut_ad(!bpage->frame); mysql_mutex_assert_owner(&buf_pool.mutex); + ut_ad(mach_read_from_4(bpage->zip.data + FIL_PAGE_OFFSET) == id.page_no()); ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked()); ut_ad(bpage == buf_pool.page_hash.get(id, chain)); ut_ad(!buf_pool.watch_is_sentinel(*bpage)); @@ -2274,6 +2118,7 @@ ut_ad(state <= buf_page_t::READ_FIX); ut_ad(bpage->lock.is_write_locked()); const auto frame= dpage->frame; + ut_ad(frame == reinterpret_cast(dpage)->frame_address()); dpage->lock.free(); new (dpage) buf_page_t(*bpage); @@ -2345,7 +2190,6 @@ ut_ad(w->access_time == 0); ut_ad(!w->oldest_modification()); ut_ad(!w->zip.data); - ut_ad(!w->in_zip_hash); static_assert(buf_page_t::NOT_USED == 0, "efficiency"); if (ut_d(auto s=) w->state()) { @@ -2625,6 +2469,8 @@ ut_ad(block->zip_size()); ut_a(block->page.id().space() != 0); + ut_ad(mach_read_from_4(frame + FIL_PAGE_OFFSET) + == block->page.id().page_no()); if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) { @@ -2863,7 +2709,6 @@ if (b && !watch_is_sentinel(*b)) { uint32_t state= b->fix() + 1; - ut_ad(!b->in_zip_hash); hash_lock.unlock_shared(); if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) @@ -2893,7 +2738,8 @@ return reinterpret_cast(-1); } - if (UNIV_LIKELY(b->frame != nullptr)); + if (UNIV_LIKELY(b->frame != nullptr)) + ut_ad(b->frame==reinterpret_cast(b)->frame_address()); else if (state < buf_page_t::READ_FIX) goto unzip; else @@ -2959,6 +2805,49 @@ } } +TRANSACTIONAL_TARGET +uint32_t buf_pool_t::page_guess(buf_block_t *b, page_hash_latch &latch, + const page_id_t id) noexcept +{ + transactional_shared_lock_guard g{latch}; +#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* shrunk() and my_virtual_mem_decommit() could retain the original + contents of the virtual memory range or zero it out immediately or + with a delay. Any zeroing out may lead to a false positive for + b->page.id() == id but never for b->page.state(). At the time of + the shrunk() call, shrink() and buf_LRU_block_free_non_file_page() + should guarantee that b->page.state() is equal to + buf_page_t::NOT_USED (0) for all to-be-freed blocks. */ +#else + /* shrunk() made the memory inaccessible. */ + if (UNIV_UNLIKELY(reinterpret_cast(b) >= memory + size_in_bytes)) + return 0; +#endif + const page_id_t block_id{b->page.id()}; +#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* shrunk() may have invoked MEM_UNDEFINED() on this memory to be able + to catch any unintended access elsewhere in our code. */ + MEM_MAKE_DEFINED(&block_id, sizeof block_id); +#endif + + if (id == block_id) + { + uint32_t state= b->page.state(); +#ifndef HAVE_UNACCESSIBLE_AFTER_MEM_DECOMMIT + /* shrunk() may have invoked MEM_UNDEFINED() on this memory to be able + to catch any unintended access elsewhere in our code. */ + MEM_MAKE_DEFINED(&state, sizeof state); +#endif + /* Ignore guesses that point to read-fixed blocks. We can only + avoid a race condition by looking up the block via page_hash. */ + if ((state >= buf_page_t::FREED && state < buf_page_t::READ_FIX) || + state >= buf_page_t::WRITE_FIX) + return b->page.fix(); + ut_ad(b->page.frame); + } + return 0; +} + /** Low level function used to get access to a database page. @param[in] page_id page id @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @@ -3023,22 +2912,9 @@ buf_block_t* block = guess; uint32_t state; - if (block) { - transactional_shared_lock_guard g{hash_lock}; - if (buf_pool.is_uncompressed(block) - && page_id == block->page.id()) { - ut_ad(!block->page.in_zip_hash); - state = block->page.state(); - /* Ignore guesses that point to read-fixed blocks. - We can only avoid a race condition by - looking up the block via buf_pool.page_hash. */ - if ((state >= buf_page_t::FREED - && state < buf_page_t::READ_FIX) - || state >= buf_page_t::WRITE_FIX) { - state = block->page.fix(); - goto got_block; - } - } + if (block + && (state = buf_pool.page_guess(block, hash_lock, page_id))) { + goto got_block; } guess = nullptr; @@ -3108,7 +2984,6 @@ goto loop; got_block: - ut_ad(!block->page.in_zip_hash); state++; got_block_fixed: ut_ad(state > buf_page_t::FREED); @@ -3313,6 +3188,7 @@ btr_search_drop_page_hash_index(block, true); #endif /* BTR_CUR_HASH_ADAPT */ + ut_ad(block->page.frame == block->frame_address()); ut_ad(page_id_t(page_get_space_id(block->page.frame), page_get_page_no(block->page.frame)) == page_id); return block; @@ -3418,21 +3294,19 @@ return block; } -TRANSACTIONAL_TARGET buf_block_t *buf_page_optimistic_fix(buf_block_t *block, page_id_t id) noexcept { buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); - transactional_shared_lock_guard g - {buf_pool.page_hash.lock_get(chain)}; - if (UNIV_UNLIKELY(!buf_pool.is_uncompressed(block) || - id != block->page.id() || !block->page.frame)) - return nullptr; - const auto state= block->page.state(); - if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED || - state >= buf_page_t::READ_FIX)) - return nullptr; - block->page.fix(); - return block; + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + if (uint32_t state= buf_pool.page_guess(block, hash_lock, id)) + { + if (UNIV_LIKELY(state >= buf_page_t::UNFIXED)) + return block; + else + /* Refuse access to pages that are marked as freed in the data file. */ + block->page.unfix(); + } + return nullptr; } buf_block_t *buf_page_optimistic_get(buf_block_t *block, @@ -3635,6 +3509,7 @@ { mysql_mutex_unlock(&buf_pool.mutex); buf_block_t *block= reinterpret_cast(bpage); + ut_ad(bpage->frame == block->frame_address()); mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT drop_hash_entry= block->index; @@ -3670,7 +3545,8 @@ else { mysql_mutex_unlock(&buf_pool.mutex); - ut_ad(bpage->frame); + ut_ad(bpage->frame == + reinterpret_cast(bpage)->frame_address()); #ifdef BTR_CUR_HASH_ADAPT ut_ad(!reinterpret_cast(bpage)->index); #endif @@ -4064,10 +3940,9 @@ if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED) { release_page: - if (node.space->full_crc32() && node.space->crypt_data && - recv_recovery_is_on() && - recv_sys.dblwr.find_encrypted_page(node, id().page_no(), - const_cast(read_frame))) + if (node.space->full_crc32() && recv_recovery_is_on() && + recv_sys.dblwr.find_deferred_page(node, id().page_no(), + const_cast(read_frame))) { /* Recover from doublewrite buffer */ err= DB_SUCCESS; @@ -4127,6 +4002,61 @@ return DB_SUCCESS; } +#ifdef BTR_CUR_HASH_ADAPT +/** Clear the adaptive hash index on all pages in the buffer pool. */ +ATTRIBUTE_COLD void buf_pool_t::clear_hash_index() noexcept +{ + std::set garbage; + + mysql_mutex_lock(&mutex); + ut_ad(!btr_search_enabled); + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + dict_index_t *index= block->index; + assert_block_ahi_valid(block); + + /* We can clear block->index and block->n_pointers when + holding all AHI latches exclusively; see the comments in buf0buf.h */ + + if (!index) + { +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + ut_a(!block->n_pointers); +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + continue; + } + + ut_d(const auto s= block->page.state()); + /* Another thread may have set the state to + REMOVE_HASH in buf_LRU_block_remove_hashed(). + + The state change in buf_pool_t::resize() is not observable + here, because in that case we would have !block->index. + + In the end, the entire adaptive hash index will be removed. */ + ut_ad(s >= buf_page_t::UNFIXED || s == buf_page_t::REMOVE_HASH); +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + block->n_pointers= 0; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + if (index->freed()) + garbage.insert(index); + block->index= nullptr; + } + + mysql_mutex_unlock(&mutex); + + for (dict_index_t *index : garbage) + btr_search_lazy_free(index); +} +#endif /* BTR_CUR_HASH_ADAPT */ + #ifdef UNIV_DEBUG /** Check that all blocks are in a replaceable state. @return address of a non-free block @@ -4134,10 +4064,44 @@ void buf_pool_t::assert_all_freed() noexcept { mysql_mutex_lock(&mutex); - const chunk_t *chunk= chunks; - for (auto i= n_chunks; i--; chunk++) - if (const buf_block_t* block= chunk->not_freed()) - ib::fatal() << "Page " << block->page.id() << " still fixed or dirty"; + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + if (!block->page.in_file()) + continue; + switch (const lsn_t lsn= block->page.oldest_modification()) { + case 0: + case 1: + break; + + case 2: + ut_ad(fsp_is_system_temporary(block->page.id().space())); + break; + + default: + if (srv_read_only_mode) + { + /* The page cleaner is disabled in read-only mode. No pages + can be dirtied, so all of them must be clean. */ + ut_ad(lsn == recv_sys.lsn || + srv_force_recovery == SRV_FORCE_NO_LOG_REDO); + break; + } + + goto fixed_or_dirty; + } + + if (!block->page.can_relocate()) + fixed_or_dirty: + ib::fatal() << "Page " << block->page.id() << " still fixed or dirty"; + } + mysql_mutex_unlock(&mutex); } #endif /* UNIV_DEBUG */ @@ -4187,40 +4151,35 @@ mysql_mutex_lock(&mutex); - chunk_t* chunk = chunks; - /* Check the uncompressed blocks. */ - for (auto i = n_chunks; i--; chunk++) { - buf_block_t* block = chunk->blocks; - - for (auto j = chunk->size; j--; block++) { - ut_ad(block->page.frame); - switch (const auto f = block->page.state()) { - case buf_page_t::NOT_USED: - n_free++; - break; + for (ulint i = 0; i < n_blocks; i++) { + const buf_block_t* block = get_nth_page(i); + ut_ad(block->page.frame == block->frame_address()); - case buf_page_t::MEMORY: - case buf_page_t::REMOVE_HASH: - /* do nothing */ + switch (const auto f = block->page.state()) { + case buf_page_t::NOT_USED: + ut_ad(!block->page.in_LRU_list); + n_free++; + break; + case buf_page_t::MEMORY: + case buf_page_t::REMOVE_HASH: + /* do nothing */ + break; + default: + if (f >= buf_page_t::READ_FIX + && f < buf_page_t::WRITE_FIX) { + /* A read-fixed block is not + necessarily in the page_hash yet. */ break; - - default: - if (f >= buf_page_t::READ_FIX - && f < buf_page_t::WRITE_FIX) { - /* A read-fixed block is not - necessarily in the page_hash yet. */ - break; - } - ut_ad(f >= buf_page_t::FREED); - const page_id_t id{block->page.id()}; - ut_ad(page_hash.get( - id, - page_hash.cell_get(id.fold())) - == &block->page); - n_lru++; } + ut_ad(f >= buf_page_t::FREED); + const page_id_t id{block->page.id()}; + ut_ad(page_hash.get( + id, + page_hash.cell_get(id.fold())) + == &block->page); + n_lru++; } } @@ -4245,24 +4204,11 @@ ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing); mysql_mutex_unlock(&flush_list_mutex); - - if (n_chunks_new == n_chunks - && n_lru + n_free > curr_size + n_zip) { - - ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free - << ", pool " << curr_size - << " zip " << n_zip << ". Aborting..."; - } - + ut_ad(n_lru + n_free <= n_blocks + n_zip); ut_ad(UT_LIST_GET_LEN(LRU) >= n_lru); - - if (n_chunks_new == n_chunks - && UT_LIST_GET_LEN(free) != n_free) { - - ib::fatal() << "Free list len " - << UT_LIST_GET_LEN(free) - << ", free blocks " << n_free << ". Aborting..."; - } + ut_ad(UT_LIST_GET_LEN(free) <= n_free); + ut_ad(size_in_bytes != size_in_bytes_requested + || UT_LIST_GET_LEN(free) == n_free); mysql_mutex_unlock(&mutex); @@ -4277,26 +4223,23 @@ { index_id_t* index_ids; ulint* counts; - ulint size; ulint i; - ulint j; index_id_t id; ulint n_found; - chunk_t* chunk; dict_index_t* index; - size = curr_size; + mysql_mutex_lock(&mutex); index_ids = static_cast( - ut_malloc_nokey(size * sizeof *index_ids)); + ut_malloc_nokey(n_blocks * sizeof *index_ids)); - counts = static_cast(ut_malloc_nokey(sizeof(ulint) * size)); + counts = static_cast( + ut_malloc_nokey(sizeof(ulint) * n_blocks)); - mysql_mutex_lock(&mutex); mysql_mutex_lock(&flush_list_mutex); ib::info() - << "[buffer pool: size=" << curr_size + << "[buffer pool: size=" << n_blocks << ", database pages=" << UT_LIST_GET_LEN(LRU) << ", free pages=" << UT_LIST_GET_LEN(free) << ", modified database pages=" @@ -4316,38 +4259,28 @@ n_found = 0; - chunk = chunks; - - for (i = n_chunks; i--; chunk++) { - buf_block_t* block = chunk->blocks; - ulint n_blocks = chunk->size; - - for (; n_blocks--; block++) { - const buf_frame_t* frame = block->page.frame; - - if (fil_page_index_page_check(frame)) { - - id = btr_page_get_index_id(frame); - - /* Look for the id in the index_ids array */ - j = 0; - - while (j < n_found) { - - if (index_ids[j] == id) { - counts[j]++; - - break; - } - j++; - } - - if (j == n_found) { - n_found++; - index_ids[j] = id; - counts[j] = 1; + for (size_t i = 0; i < n_blocks; i++) { + buf_block_t* block = get_nth_page(i); + const buf_frame_t* frame = block->page.frame; + ut_ad(frame == block->frame_address()); + + if (fil_page_index_page_check(frame)) { + + id = btr_page_get_index_id(frame); + + /* Look for the id in the index_ids array */ + for (ulint j = 0; j < n_found; j++) { + if (index_ids[j] == id) { + counts[j]++; + goto found; } } + + index_ids[n_found] = id; + counts[n_found] = 1; + n_found++; +found: + continue; } } @@ -4381,138 +4314,78 @@ { ulint fixed_pages_number= 0; - mysql_mutex_lock(&buf_pool.mutex); + mysql_mutex_assert_owner(&buf_pool.mutex); for (buf_page_t *b= UT_LIST_GET_FIRST(buf_pool.LRU); b; b= UT_LIST_GET_NEXT(LRU, b)) if (b->state() > buf_page_t::UNFIXED) fixed_pages_number++; - mysql_mutex_unlock(&buf_pool.mutex); - return fixed_pages_number; } #endif /* UNIV_DEBUG */ -/** Collect buffer pool metadata. -@param[out] pool_info buffer pool metadata */ -void buf_stats_get_pool_info(buf_pool_info_t *pool_info) noexcept +void buf_pool_t::get_info(buf_pool_info_t *pool_info) noexcept { - time_t current_time; - double time_elapsed; - - mysql_mutex_lock(&buf_pool.mutex); - - pool_info->pool_size = buf_pool.curr_size; - - pool_info->lru_len = UT_LIST_GET_LEN(buf_pool.LRU); - - pool_info->old_lru_len = buf_pool.LRU_old_len; - - pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool.free); - - mysql_mutex_lock(&buf_pool.flush_list_mutex); - pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool.flush_list); - - pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool.unzip_LRU); - - pool_info->n_pend_reads = os_aio_pending_reads_approx(); - - pool_info->n_pending_flush_lru = buf_pool.n_flush(); - - pool_info->n_pending_flush_list = os_aio_pending_writes(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - - current_time = time(NULL); - time_elapsed = 0.001 + difftime(current_time, - buf_pool.last_printout_time); - - pool_info->n_pages_made_young = buf_pool.stat.n_pages_made_young; - - pool_info->n_pages_not_made_young = - buf_pool.stat.n_pages_not_made_young; - - pool_info->n_pages_read = buf_pool.stat.n_pages_read; - - pool_info->n_pages_created = buf_pool.stat.n_pages_created; - - pool_info->n_pages_written = buf_pool.stat.n_pages_written; - - pool_info->n_page_gets = buf_pool.stat.n_page_gets; - - pool_info->n_ra_pages_read_rnd = buf_pool.stat.n_ra_pages_read_rnd; - pool_info->n_ra_pages_read = buf_pool.stat.n_ra_pages_read; - - pool_info->n_ra_pages_evicted = buf_pool.stat.n_ra_pages_evicted; - - pool_info->page_made_young_rate = - static_cast(buf_pool.stat.n_pages_made_young - - buf_pool.old_stat.n_pages_made_young) - / time_elapsed; - - pool_info->page_not_made_young_rate = - static_cast(buf_pool.stat.n_pages_not_made_young - - buf_pool.old_stat.n_pages_not_made_young) - / time_elapsed; - - pool_info->pages_read_rate = - static_cast(buf_pool.stat.n_pages_read - - buf_pool.old_stat.n_pages_read) - / time_elapsed; - - pool_info->pages_created_rate = - static_cast(buf_pool.stat.n_pages_created - - buf_pool.old_stat.n_pages_created) - / time_elapsed; - - pool_info->pages_written_rate = - static_cast(buf_pool.stat.n_pages_written - - buf_pool.old_stat.n_pages_written) - / time_elapsed; - - pool_info->n_page_get_delta = buf_pool.stat.n_page_gets - - buf_pool.old_stat.n_page_gets; - - if (pool_info->n_page_get_delta) { - pool_info->page_read_delta = buf_pool.stat.n_pages_read - - buf_pool.old_stat.n_pages_read; - - pool_info->young_making_delta = - buf_pool.stat.n_pages_made_young - - buf_pool.old_stat.n_pages_made_young; - - pool_info->not_young_making_delta = - buf_pool.stat.n_pages_not_made_young - - buf_pool.old_stat.n_pages_not_made_young; - } - pool_info->pages_readahead_rnd_rate = - static_cast(buf_pool.stat.n_ra_pages_read_rnd - - buf_pool.old_stat.n_ra_pages_read_rnd) - / time_elapsed; - - - pool_info->pages_readahead_rate = - static_cast(buf_pool.stat.n_ra_pages_read - - buf_pool.old_stat.n_ra_pages_read) - / time_elapsed; - - pool_info->pages_evicted_rate = - static_cast(buf_pool.stat.n_ra_pages_evicted - - buf_pool.old_stat.n_ra_pages_evicted) - / time_elapsed; - - pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool.unzip_LRU); - - pool_info->io_sum = buf_LRU_stat_sum.io; - - pool_info->io_cur = buf_LRU_stat_cur.io; + mysql_mutex_lock(&mutex); + pool_info->pool_size= curr_size(); + pool_info->lru_len= UT_LIST_GET_LEN(LRU); + pool_info->old_lru_len= LRU_old_len; + pool_info->free_list_len= UT_LIST_GET_LEN(free); - pool_info->unzip_sum = buf_LRU_stat_sum.unzip; + mysql_mutex_lock(&flush_list_mutex); + pool_info->flush_list_len= UT_LIST_GET_LEN(flush_list); + pool_info->n_pend_unzip= UT_LIST_GET_LEN(unzip_LRU); + pool_info->n_pend_reads= os_aio_pending_reads_approx(); + pool_info->n_pending_flush_lru= n_flush(); + pool_info->n_pending_flush_list= os_aio_pending_writes(); + mysql_mutex_unlock(&flush_list_mutex); - pool_info->unzip_cur = buf_LRU_stat_cur.unzip; + double elapsed= 0.001 + difftime(time(nullptr), last_printout_time); - buf_refresh_io_stats(); - mysql_mutex_unlock(&buf_pool.mutex); + pool_info->n_pages_made_young= stat.n_pages_made_young; + pool_info->page_made_young_rate= + double(stat.n_pages_made_young - old_stat.n_pages_made_young) / + elapsed; + pool_info->n_pages_not_made_young= stat.n_pages_not_made_young; + pool_info->page_not_made_young_rate= + double(stat.n_pages_not_made_young - old_stat.n_pages_not_made_young) / + elapsed; + pool_info->n_pages_read= stat.n_pages_read; + pool_info->pages_read_rate= + double(stat.n_pages_read - old_stat.n_pages_read) / elapsed; + pool_info->n_pages_created= stat.n_pages_created; + pool_info->pages_created_rate= + double(stat.n_pages_created - old_stat.n_pages_created) / elapsed; + pool_info->n_pages_written= stat.n_pages_written; + pool_info->pages_written_rate= + double(stat.n_pages_written - old_stat.n_pages_written) / elapsed; + pool_info->n_page_gets= stat.n_page_gets; + pool_info->n_page_get_delta= stat.n_page_gets - old_stat.n_page_gets; + if (pool_info->n_page_get_delta) + { + pool_info->page_read_delta= stat.n_pages_read - old_stat.n_pages_read; + pool_info->young_making_delta= + stat.n_pages_made_young - old_stat.n_pages_made_young; + pool_info->not_young_making_delta= + stat.n_pages_not_made_young - old_stat.n_pages_not_made_young; + } + pool_info->n_ra_pages_read_rnd= stat.n_ra_pages_read_rnd; + pool_info->pages_readahead_rnd_rate= + double(stat.n_ra_pages_read_rnd - old_stat.n_ra_pages_read_rnd) / elapsed; + pool_info->n_ra_pages_read= stat.n_ra_pages_read; + pool_info->pages_readahead_rate= + double(stat.n_ra_pages_read - old_stat.n_ra_pages_read) / elapsed; + pool_info->n_ra_pages_evicted= stat.n_ra_pages_evicted; + pool_info->pages_evicted_rate= + double(stat.n_ra_pages_evicted - old_stat.n_ra_pages_evicted) / elapsed; + pool_info->unzip_lru_len= UT_LIST_GET_LEN(unzip_LRU); + pool_info->io_sum= buf_LRU_stat_sum.io; + pool_info->io_cur= buf_LRU_stat_cur.io; + pool_info->unzip_sum= buf_LRU_stat_sum.unzip; + pool_info->unzip_cur= buf_LRU_stat_cur.unzip; + buf_refresh_io_stats(); + mysql_mutex_unlock(&mutex); } /*********************************************************************//** @@ -4620,7 +4493,7 @@ { buf_pool_info_t pool_info; - buf_stats_get_pool_info(&pool_info); + buf_pool.get_info(&pool_info); buf_print_io_instance(&pool_info, file); } diff -Nru mariadb-10.11.11/storage/innobase/buf/buf0dblwr.cc mariadb-10.11.13/storage/innobase/buf/buf0dblwr.cc --- mariadb-10.11.11/storage/innobase/buf/buf0dblwr.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/buf/buf0dblwr.cc 2025-05-19 16:14:25.000000000 +0000 @@ -365,7 +365,7 @@ ut_ad(log_sys.last_checkpoint_lsn); if (!is_created()) return; - const lsn_t max_lsn{log_sys.get_lsn()}; + const lsn_t max_lsn{log_sys.get_flushed_lsn(std::memory_order_relaxed)}; ut_ad(recv_sys.scanned_lsn == max_lsn); ut_ad(recv_sys.scanned_lsn >= recv_sys.lsn); @@ -374,7 +374,7 @@ srv_page_size)); byte *const buf= read_buf + srv_page_size; - std::deque encrypted_pages; + std::deque deferred_pages; for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin(); i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr) { @@ -393,11 +393,12 @@ { /* These pages does not appear to belong to any tablespace. There is a possibility that this page could be - encrypted using full_crc32 format. If innodb encounters - any corrupted encrypted page during recovery then - InnoDB should use this page to find the valid page. - See find_encrypted_page() */ - encrypted_pages.push_back(*i); + encrypted/compressed using full_crc32 format. + If innodb encounters any corrupted encrypted/compressed + page during recovery then InnoDB should use this page to + find the valid page. + See find_encrypted_page()/find_page_compressed() */ + deferred_pages.push_back(*i); continue; } @@ -478,7 +479,7 @@ } recv_sys.dblwr.pages.clear(); - for (byte *page : encrypted_pages) + for (byte *page : deferred_pages) recv_sys.dblwr.pages.push_back(page); fil_flush_file_spaces(); aligned_free(read_buf); @@ -599,20 +600,67 @@ } #endif /* UNIV_DEBUG */ +ATTRIBUTE_COLD void buf_dblwr_t::print_info() const noexcept +{ + mysql_mutex_assert_owner(&mutex); + const slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0]; + + sql_print_information("InnoDB: Double Write State\n" + "-------------------\n" + "Batch running : %s\n" + "Active Slot - first_free: %zu reserved: %zu\n" + "Flush Slot - first_free: %zu reserved: %zu\n" + "-------------------", + (batch_running ? "true" : "false"), + active_slot->first_free, active_slot->reserved, + flush_slot->first_free, flush_slot->reserved); +} + bool buf_dblwr_t::flush_buffered_writes(const ulint size) noexcept { mysql_mutex_assert_owner(&mutex); ut_ad(size == block_size()); - for (;;) + const size_t max_count= 60 * 60; + const size_t first_log_count= 30; + const size_t fatal_threshold= + static_cast(srv_fatal_semaphore_wait_threshold); + size_t log_count= first_log_count; + + for (size_t count= 0;;) { if (!active_slot->first_free) return false; if (!batch_running) break; - my_cond_wait(&cond, &mutex.m_mutex); - } + timespec abstime; + set_timespec(abstime, 1); + my_cond_timedwait(&cond, &mutex.m_mutex, &abstime); + + if (count > fatal_threshold) + { + buf_pool.print_flush_info(); + print_info(); + ib::fatal() << "InnoDB: Long wait (" << count + << " seconds) for double-write buffer flush."; + } + else if (++count < first_log_count && !(count % 5)) + { + sql_print_information("InnoDB: Long wait (%zu seconds) for double-write" + " buffer flush.", count); + buf_pool.print_flush_info(); + print_info(); + } + else if (!(count % log_count)) + { + sql_print_warning("InnoDB: Long wait (%zu seconds) for double-write" + " buffer flush.", count); + buf_pool.print_flush_info(); + print_info(); + log_count= log_count >= max_count ? max_count : log_count * 2; + } + } ut_ad(active_slot->reserved == active_slot->first_free); ut_ad(!flushing_buffered_writes); @@ -732,6 +780,9 @@ ut_ad(lsn); ut_ad(lsn >= bpage->oldest_modification()); log_write_up_to(lsn, true); + ut_ad(!e.request.node->space->full_crc32() || + !buf_page_is_corrupted(true, static_cast(frame), + e.request.node->space->flags)); e.request.node->space->io(e.request, bpage->physical_offset(), e_size, frame, bpage); } diff -Nru mariadb-10.11.11/storage/innobase/buf/buf0dump.cc mariadb-10.11.13/storage/innobase/buf/buf0dump.cc --- mariadb-10.11.11/storage/innobase/buf/buf0dump.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/buf/buf0dump.cc 2025-05-19 16:14:25.000000000 +0000 @@ -58,7 +58,7 @@ static volatile bool buf_dump_should_start; static volatile bool buf_load_should_start; -static bool buf_load_abort_flag; +static Atomic_relaxed buf_load_abort_flag; /** Start the buffer pool dump/load task and instructs it to start a dump. */ void buf_dump_start() @@ -295,7 +295,7 @@ /* limit the number of total pages dumped to X% of the total number of pages */ - t_pages = buf_pool.curr_size * srv_buf_pool_dump_pct / 100; + t_pages = buf_pool.curr_size() * srv_buf_pool_dump_pct / 100; if (n_pages > t_pages) { buf_dump_status(STATUS_INFO, "Restricted to " ULINTPF @@ -477,10 +477,10 @@ return; } - /* If dump is larger than the buffer pool(s), then we ignore the + /* If the dump is larger than the buffer pool, then we ignore the extra trailing. This could happen if a dump is made, then buffer pool is shrunk and then load is attempted. */ - dump_n = std::min(dump_n, buf_pool.get_n_pages()); + dump_n = std::min(dump_n, buf_pool.curr_size()); if (dump_n != 0) { dump = static_cast(ut_malloc_nokey( diff -Nru mariadb-10.11.11/storage/innobase/buf/buf0flu.cc mariadb-10.11.13/storage/innobase/buf/buf0flu.cc --- mariadb-10.11.11/storage/innobase/buf/buf0flu.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/buf/buf0flu.cc 2025-05-19 16:14:25.000000000 +0000 @@ -281,6 +281,8 @@ { ut_ad(!persistent == fsp_is_system_temporary(id().space())); ut_ad(state >= WRITE_FIX); + ut_ad(!frame || + frame == reinterpret_cast(this)->frame_address()); if (UNIV_LIKELY(!error)) { @@ -692,7 +694,6 @@ { static_assert(FIL_PAGE_FCRC32_CHECKSUM == 4, "alignment"); mach_write_to_4(tmp + len - 4, my_crc32c(0, tmp, len - 4)); - ut_ad(!buf_page_is_corrupted(true, tmp, space->flags)); } d= tmp; @@ -795,6 +796,7 @@ size_t orig_size; #endif buf_tmp_buffer_t *slot= nullptr; + byte *page= frame; if (UNIV_UNLIKELY(!frame)) /* ROW_FORMAT=COMPRESSED */ { @@ -810,7 +812,6 @@ } else { - byte *page= frame; size= block->physical_size(); #if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 orig_size= size; @@ -852,6 +853,8 @@ if (!space->is_temporary() && !space->is_being_imported() && lsn > log_sys.get_flushed_lsn()) log_write_up_to(lsn, true); + ut_ad(space->is_temporary() || !space->full_crc32() || + !buf_page_is_corrupted(true, write_frame, space->flags)); space->io(IORequest{type, this, slot}, physical_offset(), size, write_frame, this); } @@ -891,7 +894,7 @@ : space.physical_size() == 1024 ? 3 : 0)); /* When flushed, dirty blocks are searched in neighborhoods of this size, and flushed along with the original page. */ - const ulint s= buf_pool.curr_size / 16; + const ulint s= buf_pool.curr_size() / 16; const uint32_t read_ahead= buf_pool.read_ahead_area; const uint32_t buf_flush_area= read_ahead > s ? static_cast(s) : read_ahead; @@ -1209,18 +1212,34 @@ buf_LRU_free_page(bpage, true); } +/** Adjust to_withdraw during buf_pool_t::shrink() */ +ATTRIBUTE_COLD static size_t buf_flush_LRU_to_withdraw(size_t to_withdraw, + const buf_page_t &bpage) + noexcept +{ + mysql_mutex_assert_owner(&buf_pool.mutex); + if (!buf_pool.is_shrinking()) + return 0; + const size_t size{buf_pool.size_in_bytes_requested}; + if (buf_pool.will_be_withdrawn(bpage.frame, size) || + buf_pool.will_be_withdrawn(bpage.zip.data, size)) + to_withdraw--; + return to_withdraw; +} + /** Flush dirty blocks from the end buf_pool.LRU, and move clean blocks to buf_pool.free. -@param max maximum number of blocks to flush -@param n counts of flushed and evicted pages */ -static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept +@param max maximum number of blocks to flush +@param n counts of flushed and evicted pages +@param to_withdraw buf_pool.to_withdraw() */ +static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n, + size_t to_withdraw) noexcept { - ulint scanned= 0; + size_t scanned= 0; mysql_mutex_assert_owner(&buf_pool.mutex); - ulint free_limit{buf_pool.LRU_scan_depth}; - if (buf_pool.withdraw_target && buf_pool.is_shrinking()) - free_limit+= buf_pool.withdraw_target - UT_LIST_GET_LEN(buf_pool.withdraw); - + size_t free_limit{buf_pool.LRU_scan_depth}; + if (UNIV_UNLIKELY(to_withdraw > free_limit)) + to_withdraw= free_limit; const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN ? 0 : buf_pool.flush_neighbors; fil_space_t *space= nullptr; @@ -1230,20 +1249,21 @@ /* BUF_LRU_MIN_LEN (256) is too high value for low buffer pool(BP) size. For example, for BP size lower than 80M and 16 K page size, the limit is more than - 5% of total BP and for lowest BP 5M, it is 80% of the BP. Non-data objects + 5% of total BP and for lowest BP 6M, it is 80% of the BP. Non-data objects like explicit locks could occupy part of the BP pool reducing the pages available for LRU. If LRU reaches minimum limit and if no free pages are available, server would hang with page cleaner not able to free any more pages. To avoid such hang, we adjust the LRU limit lower than the limit for data objects as checked in buf_LRU_check_size_of_non_data_objects() i.e. one page less than 5% of BP. */ - size_t pool_limit= buf_pool.curr_size / 20 - 1; - auto buf_lru_min_len= std::min(pool_limit, BUF_LRU_MIN_LEN); + const size_t buf_lru_min_len= + std::min((buf_pool.usable_size()) / 20 - 1, size_t{BUF_LRU_MIN_LEN}); for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU); bpage && ((UT_LIST_GET_LEN(buf_pool.LRU) > buf_lru_min_len && UT_LIST_GET_LEN(buf_pool.free) < free_limit) || + to_withdraw || recv_recovery_is_on()); ++scanned, bpage= buf_pool.lru_hp.get()) { @@ -1259,6 +1279,8 @@ if (state != buf_page_t::FREED && (state >= buf_page_t::READ_FIX || (~buf_page_t::LRU_MASK & state))) continue; + if (UNIV_UNLIKELY(to_withdraw != 0)) + to_withdraw= buf_flush_LRU_to_withdraw(to_withdraw, *bpage); buf_LRU_free_page(bpage, true); ++n->evicted; if (UNIV_LIKELY(scanned & 31)) @@ -1330,20 +1352,32 @@ continue; } + if (state < buf_page_t::UNFIXED) + goto flush; + if (n->flushed >= max && !recv_recovery_is_on()) { bpage->lock.u_unlock(true); break; } - if (neighbors && space->is_rotational()) + if (neighbors && space->is_rotational() && UNIV_LIKELY(!to_withdraw) && + /* Skip neighbourhood flush from LRU list if we haven't yet reached + half of the free page target. */ + UT_LIST_GET_LEN(buf_pool.free) * 2 >= free_limit) n->flushed+= buf_flush_try_neighbors(space, page_id, bpage, neighbors == 1, n->flushed, max); - else if (bpage->flush(space)) - ++n->flushed; else - continue; + { + flush: + if (UNIV_UNLIKELY(to_withdraw != 0)) + to_withdraw= buf_flush_LRU_to_withdraw(to_withdraw, *bpage); + if (bpage->flush(space)) + ++n->flushed; + else + continue; + } goto reacquire_mutex; } @@ -1372,11 +1406,12 @@ @param n counts of flushed and evicted pages */ static void buf_do_LRU_batch(ulint max, flush_counters_t *n) noexcept { - if (buf_LRU_evict_from_unzip_LRU()) + const size_t to_withdraw= buf_pool.to_withdraw(); + if (!to_withdraw && buf_LRU_evict_from_unzip_LRU()) buf_free_from_unzip_LRU_list_batch(); n->evicted= 0; n->flushed= 0; - buf_flush_LRU_list_batch(max, n); + buf_flush_LRU_list_batch(max, n, to_withdraw); mysql_mutex_assert_owner(&buf_pool.mutex); buf_lru_freed_page_count+= n->evicted; @@ -1725,14 +1760,22 @@ buf_do_LRU_batch(max_n, &n); ulint pages= n.flushed; + ulint evicted= n.evicted; + + /* If we have exhausted flush quota, it is likely we exited before + generating enough free pages. Call once more with 0 flush to generate + free pages immediately as required. */ + if (pages >= max_n) + buf_do_LRU_batch(0, &n); - if (n.evicted) + evicted+= n.evicted; + if (evicted) { buf_pool.try_LRU_scan= true; pthread_cond_broadcast(&buf_pool.done_free); } else if (!pages && !buf_pool.try_LRU_scan) - /* For example, with the minimum innodb_buffer_pool_size=5M and + /* For example, with the minimum innodb_buffer_pool_size=6M and the default innodb_page_size=16k there are only a little over 316 pages in the buffer pool. The buffer pool can easily be exhausted by a workload of some dozen concurrent connections. The system could @@ -1760,8 +1803,9 @@ { ut_ad(!srv_read_only_mode); ut_ad(end_lsn >= next_checkpoint_lsn); - ut_ad(end_lsn <= get_lsn()); - ut_ad(end_lsn + SIZE_OF_FILE_CHECKPOINT <= get_lsn() || + ut_d(const lsn_t current_lsn{get_lsn()}); + ut_ad(end_lsn <= current_lsn); + ut_ad(end_lsn + SIZE_OF_FILE_CHECKPOINT <= current_lsn || srv_shutdown_state > SRV_SHUTDOWN_INITIATED); DBUG_PRINT("ib_log", @@ -1890,7 +1934,8 @@ ut_ad(!is_opened()); my_munmap(buf, file_size); buf= resize_buf; - set_buf_free(START_OFFSET + (get_lsn() - resizing)); + buf_size= unsigned(std::min(resize_target - START_OFFSET, + buf_size_max)); } else #endif @@ -1912,7 +1957,8 @@ resize_flush_buf= nullptr; resize_target= 0; resize_lsn.store(0, std::memory_order_relaxed); - writer_update(); + resize_initiator= nullptr; + writer_update(false); } log_resize_release(); @@ -1999,6 +2045,14 @@ if (recv_recovery_is_on()) recv_sys.apply(true); +#if defined HAVE_valgrind && !__has_feature(memory_sanitizer) + /* The built-in scheduler in Valgrind may neglect some threads for a + long time. Under Valgrind, let us explicitly wait for page write + completion in order to avoid a result difference in the test + innodb.page_cleaner. */ + os_aio_wait_until_no_pending_writes(false); +#endif + switch (srv_file_flush_method) { case SRV_NOSYNC: case SRV_O_DIRECT_NO_FSYNC: @@ -2016,9 +2070,9 @@ } /** Make a checkpoint. */ -ATTRIBUTE_COLD void log_make_checkpoint() +ATTRIBUTE_COLD void log_make_checkpoint() noexcept { - buf_flush_wait_flushed(log_sys.get_lsn(std::memory_order_acquire)); + buf_flush_wait_flushed(log_get_lsn()); while (!log_checkpoint()); } @@ -2026,8 +2080,6 @@ NOTE: The calling thread is not allowed to hold any buffer page latches! */ static void buf_flush_wait(lsn_t lsn) noexcept { - ut_ad(lsn <= log_sys.get_lsn()); - lsn_t oldest_lsn; while ((oldest_lsn= buf_pool.get_oldest_modification(lsn)) < lsn) @@ -2192,6 +2244,8 @@ MONITOR_FLUSH_SYNC_PAGES, n_flushed); } + os_aio_wait_until_no_pending_writes(false); + switch (srv_file_flush_method) { case SRV_NOSYNC: case SRV_O_DIRECT_NO_FSYNC: @@ -2234,13 +2288,13 @@ mysql_mutex_unlock(&buf_pool.flush_list_mutex); } -/** Check if the adpative flushing threshold is recommended based on +/** Check if the adaptive flushing threshold is recommended based on redo log capacity filled threshold. @param oldest_lsn buf_pool.get_oldest_modification() @return true if adaptive flushing is recommended. */ static bool af_needed_for_redo(lsn_t oldest_lsn) noexcept { - lsn_t age= (log_sys.get_lsn() - oldest_lsn); + lsn_t age= log_sys.get_lsn_approx() - oldest_lsn; lsn_t af_lwm= static_cast(srv_adaptive_flushing_lwm * static_cast(log_sys.log_capacity) / 100); @@ -2300,7 +2354,7 @@ lsn_t lsn_rate; ulint n_pages = 0; - const lsn_t cur_lsn = log_sys.get_lsn(); + const lsn_t cur_lsn = log_sys.get_lsn_approx(); ut_ad(oldest_lsn <= cur_lsn); ulint pct_for_lsn = af_get_pct_for_lsn(cur_lsn - oldest_lsn); time_t curr_time = time(nullptr); @@ -2309,13 +2363,23 @@ if (!prev_lsn || !pct_for_lsn) { prev_time = curr_time; prev_lsn = cur_lsn; - if (max_pct > 0.0) { - dirty_pct /= max_pct; - } - n_pages = ulint(dirty_pct * double(srv_io_capacity)); - if (n_pages < dirty_blocks) { - n_pages= std::min(srv_io_capacity, dirty_blocks); + if (srv_io_capacity >= dirty_blocks) { + n_pages = dirty_blocks; + } else { + if (max_pct > 1.0) { + dirty_pct/= max_pct; + } + n_pages= ulint(dirty_pct * double(srv_io_capacity)); + + if (n_pages < dirty_blocks) { + n_pages= srv_io_capacity; + + } else { + /* Set maximum IO capacity upper bound. */ + n_pages= std::min(srv_max_io_capacity, + dirty_blocks); + } } func_exit: @@ -2412,6 +2476,13 @@ } TPOOL_SUPPRESS_TSAN +bool buf_pool_t::running_out() const noexcept +{ + return !recv_recovery_is_on() && + UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < n_blocks / 4; +} + +TPOOL_SUPPRESS_TSAN bool buf_pool_t::need_LRU_eviction() const noexcept { /* try_LRU_scan==false means that buf_LRU_get_free_block() is waiting @@ -2448,6 +2519,11 @@ DBUG_EXECUTE_IF("ib_page_cleaner_sleep", { std::this_thread::sleep_for(std::chrono::seconds(1)); + /* Cover the logging code in debug mode. */ + buf_pool.print_flush_info(); + buf_dblwr.lock(); + buf_dblwr.print_info(); + buf_dblwr.unlock(); }); lsn_limit= buf_flush_sync_lsn; @@ -2470,7 +2546,7 @@ (!UT_LIST_GET_LEN(buf_pool.flush_list) || srv_max_dirty_pages_pct_lwm == 0.0)) { - buf_pool.LRU_warned.clear(std::memory_order_release); + buf_pool.LRU_warned_clear(); /* We are idle; wait for buf_pool.page_cleaner_wakeup() */ my_cond_wait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex.m_mutex); @@ -2545,6 +2621,7 @@ buf_pool.n_flush_inc(); mysql_mutex_unlock(&buf_pool.flush_list_mutex); n= srv_max_io_capacity; + os_aio_wait_until_no_pending_writes(false); mysql_mutex_lock(&buf_pool.mutex); LRU_flush: n= buf_flush_LRU(n); @@ -2648,10 +2725,17 @@ !buf_pool.need_LRU_eviction()) goto check_oldest_and_set_idle; else + { mysql_mutex_lock(&buf_pool.mutex); + os_aio_wait_until_no_pending_writes(false); + } n= srv_max_io_capacity; n= n >= n_flushed ? n - n_flushed : 0; + /* It is critical to generate free pages to keep the system alive. Make + sure we are not hindered by dirty pages in LRU tail. */ + n= std::max(n, std::min(srv_max_io_capacity, + buf_pool.LRU_scan_depth)); goto LRU_flush; } @@ -2689,11 +2773,13 @@ { mysql_mutex_assert_owner(&mutex); try_LRU_scan= false; - if (!LRU_warned.test_and_set(std::memory_order_acquire)) + if (!LRU_warned) + { + LRU_warned= true; sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!" - " %zu blocks are in use and %zu free." - " Consider increasing innodb_buffer_pool_size.", - UT_LIST_GET_LEN(LRU), UT_LIST_GET_LEN(free)); + " Consider increasing innodb_buffer_pool_size."); + print_flush_info(); + } } /** Initialize page_cleaner. */ @@ -2740,7 +2826,7 @@ NOTE: The calling thread is not allowed to hold any buffer page latches! */ void buf_flush_sync_batch(lsn_t lsn) noexcept { - lsn= std::max(lsn, log_sys.get_lsn()); + lsn= std::max(lsn, log_get_lsn()); mysql_mutex_lock(&buf_pool.flush_list_mutex); buf_flush_wait(lsn); mysql_mutex_unlock(&buf_pool.flush_list_mutex); @@ -2759,24 +2845,77 @@ thd_wait_begin(nullptr, THD_WAIT_DISKIO); tpool::tpool_wait_begin(); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - for (;;) + log_sys.latch.wr_lock(SRW_LOCK_CALL); + + for (lsn_t lsn= log_sys.get_lsn();;) { - const lsn_t lsn= log_sys.get_lsn(); + log_sys.latch.wr_unlock(); + mysql_mutex_lock(&buf_pool.flush_list_mutex); buf_flush_wait(lsn); /* Wait for the page cleaner to be idle (for log resizing at startup) */ while (buf_flush_sync_lsn) my_cond_wait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex.m_mutex); - if (lsn == log_sys.get_lsn()) + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + log_sys.latch.wr_lock(SRW_LOCK_CALL); + lsn_t new_lsn= log_sys.get_lsn(); + if (lsn == new_lsn) break; + lsn= new_lsn; } - mysql_mutex_unlock(&buf_pool.flush_list_mutex); + log_sys.latch.wr_unlock(); tpool::tpool_wait_end(); thd_wait_end(nullptr); } +ATTRIBUTE_COLD void buf_pool_t::print_flush_info() const noexcept +{ + /* We do dirty read of UT_LIST count variable. */ + size_t lru_size= UT_LIST_GET_LEN(LRU); + size_t dirty_size= UT_LIST_GET_LEN(flush_list); + size_t free_size= UT_LIST_GET_LEN(free); + size_t dirty_pct= lru_size ? dirty_size * 100 / (lru_size + free_size) : 0; + sql_print_information("InnoDB: Buffer Pool pages\n" + "-------------------\n" + "LRU Pages : %zu\n" + "Free Pages : %zu\n" + "Dirty Pages: %zu : %zu%%\n" + "-------------------", + lru_size, free_size, dirty_size, dirty_pct); + + lsn_t lsn= log_get_lsn(); + lsn_t clsn= log_sys.last_checkpoint_lsn; + sql_print_information("InnoDB: LSN flush parameters\n" + "-------------------\n" + "System LSN : %" PRIu64 "\n" + "Checkpoint LSN: %" PRIu64 "\n" + "Flush ASync LSN: %" PRIu64 "\n" + "Flush Sync LSN: %" PRIu64 "\n" + "-------------------", + lsn, clsn, buf_flush_async_lsn.load(), buf_flush_sync_lsn.load()); + + lsn_t age= lsn - clsn; + lsn_t age_pct= log_sys.max_checkpoint_age + ? age * 100 / log_sys.max_checkpoint_age : 0; + sql_print_information("InnoDB: LSN age parameters\n" + "-------------------\n" + "Current Age : %" PRIu64 " : %" PRIu64 "%%\n" + "Max Age(Async): %" PRIu64 "\n" + "Max Age(Sync) : %" PRIu64 "\n" + "Capacity : %" PRIu64 "\n" + "-------------------", + age, age_pct, log_sys.max_modified_age_async, log_sys.max_checkpoint_age, + log_sys.log_capacity); + + sql_print_information("InnoDB: Pending IO count\n" + "-------------------\n" + "Pending Read : %zu\n" + "Pending Write: %zu\n" + "-------------------", + os_aio_pending_reads_approx(), os_aio_pending_writes_approx()); +} + #ifdef UNIV_DEBUG /** Functor to validate the flush list. */ struct Check { diff -Nru mariadb-10.11.11/storage/innobase/buf/buf0lru.cc mariadb-10.11.13/storage/innobase/buf/buf0lru.cc --- mariadb-10.11.11/storage/innobase/buf/buf0lru.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/buf/buf0lru.cc 2025-05-19 16:14:25.000000000 +0000 @@ -38,6 +38,7 @@ #include "srv0srv.h" #include "srv0mon.h" #include "my_cpu.h" +#include "log.h" /** The number of blocks from the LRU_old pointer onward, including the block pointed to, must be buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV @@ -133,7 +134,7 @@ buf_pool.stat.LRU_bytes += bpage->physical_size(); - ut_ad(buf_pool.stat.LRU_bytes <= buf_pool.curr_pool_size); + ut_ad(buf_pool.stat.LRU_bytes <= buf_pool.curr_pool_size()); } /** @return whether the unzip_LRU list should be used for evicting a victim @@ -259,89 +260,55 @@ return(freed); } -/** @return a buffer block from the buf_pool.free list -@retval NULL if the free list is empty */ -buf_block_t* buf_LRU_get_free_only() -{ - buf_block_t* block; - - mysql_mutex_assert_owner(&buf_pool.mutex); - - block = reinterpret_cast( - UT_LIST_GET_FIRST(buf_pool.free)); - - while (block != NULL) { - ut_ad(block->page.in_free_list); - ut_d(block->page.in_free_list = FALSE); - ut_ad(!block->page.oldest_modification()); - ut_ad(!block->page.in_LRU_list); - ut_a(!block->page.in_file()); - UT_LIST_REMOVE(buf_pool.free, &block->page); - - if (!buf_pool.is_shrinking() - || UT_LIST_GET_LEN(buf_pool.withdraw) - >= buf_pool.withdraw_target - || !buf_pool.will_be_withdrawn(block->page)) { - /* No adaptive hash index entries may point to - a free block. */ - assert_block_ahi_empty(block); - - block->page.set_state(buf_page_t::MEMORY); - block->page.set_os_used(); - break; - } - - /* This should be withdrawn */ - UT_LIST_ADD_LAST(buf_pool.withdraw, &block->page); - ut_d(block->in_withdraw_list = true); - - block = reinterpret_cast( - UT_LIST_GET_FIRST(buf_pool.free)); - } - - return(block); -} - /******************************************************************//** Checks how much of buf_pool is occupied by non-data objects like AHI, lock heaps etc. Depending on the size of non-data objects this function will either assert or issue a warning and switch on the status monitor. */ -static void buf_LRU_check_size_of_non_data_objects() +static void buf_LRU_check_size_of_non_data_objects() noexcept { mysql_mutex_assert_owner(&buf_pool.mutex); - if (recv_recovery_is_on() || buf_pool.n_chunks_new != buf_pool.n_chunks) + if (recv_recovery_is_on()) return; - const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); + const size_t curr_size{buf_pool.usable_size()}; - if (s < buf_pool.curr_size / 20) - ib::fatal() << "Over 95 percent of the buffer pool is" - " occupied by lock heaps" + auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); + + if (s >= curr_size / 20); + else if (buf_pool.is_shrinking()) + buf_pool.LRU_warn(); + else + { + sql_print_error("[FATAL] InnoDB: Over 95 percent of the buffer pool is" + " occupied by lock heaps" #ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index" + " or the adaptive hash index" #endif /* BTR_CUR_HASH_ADAPT */ - "! Check that your transactions do not set too many" - " row locks, or review if innodb_buffer_pool_size=" - << (buf_pool.curr_size >> (20U - srv_page_size_shift)) - << "M could be bigger."; + "! Check that your transactions do not set too many" + " row locks, or review if innodb_buffer_pool_size=%zuM" + " could be bigger", + curr_size >> (20 - srv_page_size_shift)); + abort(); + } - if (s < buf_pool.curr_size / 3) + if (s < curr_size / 3) { if (!buf_lru_switched_on_innodb_mon && srv_monitor_timer) { /* Over 67 % of the buffer pool is occupied by lock heaps or the adaptive hash index. This may be a memory leak! */ - ib::warn() << "Over 67 percent of the buffer pool is" - " occupied by lock heaps" + sql_print_warning("InnoDB: Over 67 percent of the buffer pool is" + " occupied by lock heaps" #ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index" + " or the adaptive hash index" #endif /* BTR_CUR_HASH_ADAPT */ - "! Check that your transactions do not set too many row locks." - " innodb_buffer_pool_size=" - << (buf_pool.curr_size >> (20U - srv_page_size_shift)) - << "M. Starting the InnoDB Monitor to print diagnostics."; + "! Check that your transactions do not set too many" + " row locks. innodb_buffer_pool_size=%zuM." + " Starting the InnoDB Monitor to print diagnostics.", + curr_size >> (20 - srv_page_size_shift)); + buf_lru_switched_on_innodb_mon= true; srv_print_innodb_monitor= TRUE; srv_monitor_timer_schedule_now(); @@ -389,15 +356,15 @@ retry: /* If there is a block in the free list, take it */ - block= buf_LRU_get_free_only(); + block= buf_pool.allocate(); if (block) { got_block: const ulint LRU_size= UT_LIST_GET_LEN(buf_pool.LRU); const ulint available= UT_LIST_GET_LEN(buf_pool.free); - const ulint scan_depth= buf_pool.LRU_scan_depth / 2; - ut_ad(LRU_size <= BUF_LRU_MIN_LEN || - available >= scan_depth || buf_pool.need_LRU_eviction()); + const size_t scan_depth{buf_pool.LRU_scan_depth / 2}; + ut_ad(LRU_size <= BUF_LRU_MIN_LEN || available >= scan_depth || + buf_pool.is_shrinking() || buf_pool.need_LRU_eviction()); ut_d(bool signalled = false); @@ -446,7 +413,7 @@ waited= true; - while (!(block= buf_LRU_get_free_only())) + while (!(block= buf_pool.allocate())) { buf_pool.stat.LRU_waits++; @@ -811,10 +778,10 @@ if (zip || !bpage->zip.data || !bpage->frame) { break; } + mysql_mutex_lock(&buf_pool.flush_list_mutex); relocate_compressed: b = static_cast(ut_zalloc_nokey(sizeof *b)); ut_a(b); - mysql_mutex_lock(&buf_pool.flush_list_mutex); new (b) buf_page_t(*bpage); b->frame = nullptr; { @@ -833,7 +800,12 @@ hash_lock.unlock(); return(false); } - goto relocate_compressed; + mysql_mutex_lock(&buf_pool.flush_list_mutex); + if (bpage->can_relocate()) { + goto relocate_compressed; + } + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + goto func_exit; } mysql_mutex_assert_owner(&buf_pool.mutex); @@ -872,7 +844,6 @@ /* The fields of bpage were copied to b before buf_LRU_block_remove_hashed() was invoked. */ - ut_ad(!b->in_zip_hash); ut_ad(b->in_LRU_list); ut_ad(b->in_page_hash); ut_d(b->in_page_hash = false); @@ -988,24 +959,12 @@ if (data != NULL) { block->page.zip.data = NULL; - buf_pool_mutex_exit_forbid(); - ut_ad(block->zip_size()); - buf_buddy_free(data, block->zip_size()); - - buf_pool_mutex_exit_allow(); page_zip_set_size(&block->page.zip, 0); } - if (buf_pool.is_shrinking() - && UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target - && buf_pool.will_be_withdrawn(block->page)) { - /* This should be withdrawn */ - UT_LIST_ADD_LAST( - buf_pool.withdraw, - &block->page); - ut_d(block->in_withdraw_list = true); + if (buf_pool.to_withdraw() && buf_pool.withdraw(block->page)) { } else { UT_LIST_ADD_FIRST(buf_pool.free, &block->page); ut_d(block->page.in_free_list = true); @@ -1106,7 +1065,6 @@ MEM_CHECK_ADDRESSABLE(bpage->zip.data, bpage->zip_size()); } - ut_ad(!bpage->in_zip_hash); buf_pool.page_hash.remove(chain, bpage); page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); @@ -1118,11 +1076,7 @@ ut_ad(!bpage->oldest_modification()); hash_lock.unlock(); - buf_pool_mutex_exit_forbid(); - buf_buddy_free(bpage->zip.data, bpage->zip_size()); - - buf_pool_mutex_exit_allow(); bpage->lock.free(); ut_free(bpage); return false; @@ -1151,12 +1105,7 @@ ut_ad(!bpage->in_free_list); ut_ad(!bpage->oldest_modification()); ut_ad(!bpage->in_LRU_list); - buf_pool_mutex_exit_forbid(); - buf_buddy_free(data, bpage->zip_size()); - - buf_pool_mutex_exit_allow(); - page_zip_set_size(&bpage->zip, 0); } @@ -1327,7 +1276,7 @@ ut_ad(!bpage->frame || reinterpret_cast(bpage) ->in_unzip_LRU_list - == bpage->belongs_to_unzip_LRU()); + == !!bpage->zip.data); if (bpage->is_old()) { const buf_page_t* prev diff -Nru mariadb-10.11.11/storage/innobase/buf/buf0rea.cc mariadb-10.11.13/storage/innobase/buf/buf0rea.cc --- mariadb-10.11.11/storage/innobase/buf/buf0rea.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/buf/buf0rea.cc 2025-05-19 16:14:25.000000000 +0000 @@ -44,7 +44,7 @@ #include "log.h" #include "mariadb_stats.h" -/** If there are buf_pool.curr_size per the number below pending reads, then +/** If there are buf_pool.curr_size() per the number below pending reads, then read-ahead is not done: this is to prevent flooding the buffer pool with i/o-fixed buffer blocks */ #define BUF_READ_AHEAD_PEND_LIMIT 2 @@ -63,7 +63,6 @@ ut_ad(xtest() || page_hash.lock_get(chain).is_write_locked()); ut_ad(w >= &watch[0]); ut_ad(w < &watch[array_elements(watch)]); - ut_ad(!w->in_zip_hash); ut_ad(!w->zip.data); uint32_t s{w->state()}; @@ -372,7 +371,7 @@ return 0; if (os_aio_pending_reads_approx() > - buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT) + buf_pool.curr_size() / BUF_READ_AHEAD_PEND_LIMIT) return 0; fil_space_t* space= fil_space_t::get(page_id.space()); @@ -525,7 +524,7 @@ return 0; if (os_aio_pending_reads_approx() > - buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT) + buf_pool.curr_size() / BUF_READ_AHEAD_PEND_LIMIT) return 0; const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area; diff -Nru mariadb-10.11.11/storage/innobase/dict/dict0defrag_bg.cc mariadb-10.11.13/storage/innobase/dict/dict0defrag_bg.cc --- mariadb-10.11.11/storage/innobase/dict/dict0defrag_bg.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/dict/dict0defrag_bg.cc 2025-05-19 16:14:25.000000000 +0000 @@ -196,7 +196,7 @@ ? dict_table_find_index_on_id(table, index_id) : nullptr) if (index->is_btree()) dict_stats_save_defrag_stats(index); - dict_table_close(table, false, thd, mdl); + dict_table_close(table, thd, mdl); } } @@ -217,47 +217,17 @@ if (index->is_ibuf()) return DB_SUCCESS; - MDL_ticket *mdl_table= nullptr, *mdl_index= nullptr; - dict_table_t *table_stats= dict_table_open_on_name(TABLE_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (table_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats= dict_acquire_mdl_shared(table_stats, thd, &mdl_table); - dict_sys.unfreeze(); - } - if (!table_stats || strcmp(table_stats->name.m_name, TABLE_STATS_NAME)) - { -release_and_exit: - if (table_stats) - dict_table_close(table_stats, false, thd, mdl_table); + dict_stats stats; + if (stats.open(thd)) return DB_STATS_DO_NOT_EXIST; - } - - dict_table_t *index_stats= dict_table_open_on_name(INDEX_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (index_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats= dict_acquire_mdl_shared(index_stats, thd, &mdl_index); - dict_sys.unfreeze(); - } - if (!index_stats) - goto release_and_exit; - if (strcmp(index_stats->name.m_name, INDEX_STATS_NAME)) - { - dict_table_close(index_stats, false, thd, mdl_index); - goto release_and_exit; - } - trx_t *trx= trx_create(); trx->mysql_thd= thd; trx_start_internal(trx); dberr_t ret= trx->read_only ? DB_READ_ONLY - : lock_table_for_trx(table_stats, trx, LOCK_X); + : lock_table_for_trx(stats.table(), trx, LOCK_X); if (ret == DB_SUCCESS) - ret= lock_table_for_trx(index_stats, trx, LOCK_X); + ret= lock_table_for_trx(stats.index(), trx, LOCK_X); row_mysql_lock_data_dictionary(trx); if (ret == DB_SUCCESS) ret= dict_stats_save_index_stat(index, time(nullptr), "n_pages_freed", @@ -271,31 +241,27 @@ else trx->rollback(); - if (table_stats) - dict_table_close(table_stats, true, thd, mdl_table); - if (index_stats) - dict_table_close(index_stats, true, thd, mdl_index); - row_mysql_unlock_data_dictionary(trx); trx->free(); + stats.close(); return ret; } /**************************************************************//** Gets the number of reserved and used pages in a B-tree. -@return number of pages reserved, or ULINT_UNDEFINED if the index -is unavailable */ +@return number of pages reserved +@retval 0 if the index is unavailable */ static -ulint +uint32_t btr_get_size_and_reserved( dict_index_t* index, /*!< in: index */ ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - ulint* used, /*!< out: number of pages used (<= reserved) */ + uint32_t* used, /*!< out: number of pages used (<= reserved) */ mtr_t* mtr) /*!< in/out: mini-transaction where index is s-latched */ { - ulint dummy; + uint32_t dummy; ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_SX_LOCK)); ut_a(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE); @@ -304,19 +270,19 @@ || dict_index_is_online_ddl(index) || !index->is_committed() || !index->table->space) { - return(ULINT_UNDEFINED); + return 0; } dberr_t err; buf_block_t* root = btr_root_block_get(index, RW_SX_LATCH, mtr, &err); *used = 0; if (!root) { - return ULINT_UNDEFINED; + return 0; } mtr->x_lock_space(index->table->space); - ulint n = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF + auto n = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF + root->page.frame, used, mtr); if (flag == BTR_TOTAL_SIZE) { n += fseg_n_reserved_pages(*root, @@ -343,59 +309,28 @@ const time_t now= time(nullptr); mtr_t mtr; - ulint n_leaf_pages; + uint32_t n_leaf_pages; mtr.start(); mtr_sx_lock_index(index, &mtr); - ulint n_leaf_reserved= btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES, - &n_leaf_pages, &mtr); + uint32_t n_leaf_reserved= btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES, + &n_leaf_pages, &mtr); mtr.commit(); - if (n_leaf_reserved == ULINT_UNDEFINED) + if (!n_leaf_reserved) return DB_SUCCESS; - THD *thd= current_thd; - MDL_ticket *mdl_table= nullptr, *mdl_index= nullptr; - dict_table_t* table_stats= dict_table_open_on_name(TABLE_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (table_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats= dict_acquire_mdl_shared(table_stats, thd, &mdl_table); - dict_sys.unfreeze(); - } - if (!table_stats || strcmp(table_stats->name.m_name, TABLE_STATS_NAME)) - { -release_and_exit: - if (table_stats) - dict_table_close(table_stats, false, thd, mdl_table); + THD *const thd= current_thd; + dict_stats stats; + if (stats.open(thd)) return DB_STATS_DO_NOT_EXIST; - } - - dict_table_t *index_stats= dict_table_open_on_name(INDEX_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (index_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats= dict_acquire_mdl_shared(index_stats, thd, &mdl_index); - dict_sys.unfreeze(); - } - if (!index_stats) - goto release_and_exit; - - if (strcmp(index_stats->name.m_name, INDEX_STATS_NAME)) - { - dict_table_close(index_stats, false, thd, mdl_index); - goto release_and_exit; - } - trx_t *trx= trx_create(); trx->mysql_thd= thd; trx_start_internal(trx); dberr_t ret= trx->read_only ? DB_READ_ONLY - : lock_table_for_trx(table_stats, trx, LOCK_X); + : lock_table_for_trx(stats.table(), trx, LOCK_X); if (ret == DB_SUCCESS) - ret= lock_table_for_trx(index_stats, trx, LOCK_X); + ret= lock_table_for_trx(stats.index(), trx, LOCK_X); row_mysql_lock_data_dictionary(trx); @@ -423,12 +358,9 @@ else trx->rollback(); - if (table_stats) - dict_table_close(table_stats, true, thd, mdl_table); - if (index_stats) - dict_table_close(index_stats, true, thd, mdl_index); row_mysql_unlock_data_dictionary(trx); trx->free(); + stats.close(); return ret; } diff -Nru mariadb-10.11.11/storage/innobase/dict/dict0dict.cc mariadb-10.11.13/storage/innobase/dict/dict0dict.cc --- mariadb-10.11.11/storage/innobase/dict/dict0dict.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/dict/dict0dict.cc 2025-05-19 16:14:25.000000000 +0000 @@ -44,6 +44,7 @@ #include "btr0cur.h" #include "btr0sea.h" #include "buf0buf.h" +#include "buf0flu.h" #include "data0type.h" #include "dict0boot.h" #include "dict0load.h" @@ -195,71 +196,6 @@ return(FALSE); } -/** Decrement the count of open handles */ -void dict_table_close(dict_table_t *table) -{ - if (table->get_ref_count() == 1 && - dict_stats_is_persistent_enabled(table) && - strchr(table->name.m_name, '/')) - { - /* It looks like we are closing the last handle. The user could - have executed FLUSH TABLES in order to have the statistics reloaded - from the InnoDB persistent statistics tables. We must acquire - exclusive dict_sys.latch to prevent a race condition with another - thread concurrently acquiring a handle on the table. */ - dict_sys.lock(SRW_LOCK_CALL); - if (table->release()) - { - table->stats_mutex_lock(); - if (table->get_ref_count() == 0) - dict_stats_deinit(table); - table->stats_mutex_unlock(); - } - dict_sys.unlock(); - } - else - table->release(); -} - -/** Decrements the count of open handles of a table. -@param[in,out] table table -@param[in] dict_locked whether dict_sys.latch is being held -@param[in] thd thread to release MDL -@param[in] mdl metadata lock or NULL if the thread - is a foreground one. */ -void -dict_table_close( - dict_table_t* table, - bool dict_locked, - THD* thd, - MDL_ticket* mdl) -{ - if (!dict_locked) - dict_table_close(table); - else - { - if (table->release() && dict_stats_is_persistent_enabled(table) && - strchr(table->name.m_name, '/')) - { - /* Force persistent stats re-read upon next open of the table so - that FLUSH TABLE can be used to forcibly fetch stats from disk if - they have been manually modified. */ - table->stats_mutex_lock(); - if (table->get_ref_count() == 0) - dict_stats_deinit(table); - table->stats_mutex_unlock(); - } - - ut_ad(dict_lru_validate()); - ut_ad(dict_sys.find(table)); - } - - if (!thd || !mdl); - else if (MDL_context *mdl_context= static_cast - (thd_mdl_context(thd))) - mdl_context->release_lock(mdl); -} - /** Check if the table has a given (non_virtual) column. @param[in] table table object @param[in] col_name column name @@ -586,6 +522,14 @@ return(ULINT_UNDEFINED); } +void mdl_release(THD *thd, MDL_ticket *mdl) noexcept +{ + if (!thd || !mdl); + else if (MDL_context *mdl_context= static_cast + (thd_mdl_context(thd))) + mdl_context->release_lock(mdl); +} + /** Parse the table file name into table name and database name. @tparam dict_frozen whether the caller holds dict_sys.latch @param[in,out] db_name database name buffer @@ -694,32 +638,28 @@ MDL_context *mdl_context, MDL_ticket **mdl, dict_table_op_t table_op) { - table_id_t table_id= table->id; char db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1]; char tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1]; size_t db_len, tbl_len; - bool unaccessible= false; if (!table->parse_name(db_buf, tbl_buf, &db_len, &tbl_len)) /* The name of an intermediate table starts with #sql */ return table; retry: - if (!unaccessible && (!table->is_readable() || table->corrupted)) + ut_ad(!trylock == dict_sys.frozen()); + + if (!table->is_readable() || table->corrupted) { if (*mdl) { mdl_context->release_lock(*mdl); *mdl= nullptr; } - unaccessible= true; + return nullptr; } - if (!trylock) - table->release(); - - if (unaccessible) - return nullptr; + const table_id_t table_id{table->id}; if (!trylock) dict_sys.unfreeze(); @@ -748,11 +688,38 @@ } } + size_t db1_len, tbl1_len; +lookup: dict_sys.freeze(SRW_LOCK_CALL); table= dict_sys.find_table(table_id); if (table) - table->acquire(); - if (!table && table_op != DICT_TABLE_OP_OPEN_ONLY_IF_CACHED) + { + if (!table->is_accessible()) + { + table= nullptr; + unlock_and_return_without_mdl: + if (trylock) + dict_sys.unfreeze(); + return_without_mdl: + if (*mdl) + { + mdl_context->release_lock(*mdl); + *mdl= nullptr; + } + return table; + } + + if (trylock) + table->acquire(); + + if (!table->parse_name(db_buf1, tbl_buf1, &db1_len, &tbl1_len)) + { + /* The table was renamed to #sql prefix. + Release MDL (if any) for the old name and return. */ + goto unlock_and_return_without_mdl; + } + } + else if (table_op != DICT_TABLE_OP_OPEN_ONLY_IF_CACHED) { dict_sys.unfreeze(); dict_sys.lock(SRW_LOCK_CALL); @@ -760,33 +727,19 @@ table_op == DICT_TABLE_OP_LOAD_TABLESPACE ? DICT_ERR_IGNORE_RECOVER_LOCK : DICT_ERR_IGNORE_FK_NOKEY); - if (table) - table->acquire(); dict_sys.unlock(); - dict_sys.freeze(SRW_LOCK_CALL); - } - - if (!table || !table->is_accessible()) - { -return_without_mdl: - if (trylock) - dict_sys.unfreeze(); - if (*mdl) - { - mdl_context->release_lock(*mdl); - *mdl= nullptr; - } - return nullptr; - } - - size_t db1_len, tbl1_len; - - if (!table->parse_name(db_buf1, tbl_buf1, &db1_len, &tbl1_len)) - { - /* The table was renamed to #sql prefix. - Release MDL (if any) for the old name and return. */ + /* At this point, the freshly loaded table may already have been evicted. + We must look it up again while holding a shared dict_sys.latch. We keep + trying this until the table is found in the cache or it cannot be found + in the dictionary (because the table has been dropped or rebuilt). */ + if (table) + goto lookup; + if (!trylock) + dict_sys.freeze(SRW_LOCK_CALL); goto return_without_mdl; } + else + goto return_without_mdl; if (*mdl) { @@ -873,6 +826,7 @@ dict_table_op_t table_op, THD *thd, MDL_ticket **mdl) { +retry: if (!dict_locked) dict_sys.freeze(SRW_LOCK_CALL); @@ -880,9 +834,21 @@ if (table) { - table->acquire(); - if (thd && !dict_locked) - table= dict_acquire_mdl_shared(table, thd, mdl, table_op); + if (!dict_locked) + { + if (thd) + { + table= dict_acquire_mdl_shared(table, thd, mdl, table_op); + if (table) + goto acquire; + } + else + acquire: + table->acquire(); + dict_sys.unfreeze(); + } + else + table->acquire(); } else if (table_op != DICT_TABLE_OP_OPEN_ONLY_IF_CACHED) { @@ -895,24 +861,16 @@ table_op == DICT_TABLE_OP_LOAD_TABLESPACE ? DICT_ERR_IGNORE_RECOVER_LOCK : DICT_ERR_IGNORE_FK_NOKEY); - if (table) - table->acquire(); if (!dict_locked) { dict_sys.unlock(); - if (table && thd) - { - dict_sys.freeze(SRW_LOCK_CALL); - table= dict_acquire_mdl_shared(table, thd, mdl, table_op); - dict_sys.unfreeze(); - } - return table; + if (table) + goto retry; } + else if (table) + table->acquire(); } - if (!dict_locked) - dict_sys.unfreeze(); - return table; } @@ -975,7 +933,7 @@ UT_LIST_INIT(table_LRU, &dict_table_t::table_LRU); UT_LIST_INIT(table_non_LRU, &dict_table_t::table_LRU); - const ulint hash_size = buf_pool_get_curr_size() + const ulint hash_size = buf_pool.curr_pool_size() / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); table_hash.create(hash_size); @@ -1012,7 +970,10 @@ const ulong threshold= srv_fatal_semaphore_wait_threshold; if (waited >= threshold) + { + buf_pool.print_flush_info(); ib::fatal() << fatal_msg; + } if (waited > threshold / 4) ib::warn() << "A long wait (" << waited @@ -1129,6 +1090,55 @@ DBUG_RETURN(table); } +bool dict_stats::open(THD *thd) noexcept +{ + ut_ad(!mdl_table); + ut_ad(!mdl_index); + ut_ad(!table_stats); + ut_ad(!index_stats); + ut_ad(!mdl_context); + + mdl_context= static_cast(thd_mdl_context(thd)); + if (!mdl_context) + return true; + /* FIXME: use compatible type, and maybe remove this parameter altogether! */ + const double timeout= double(global_system_variables.lock_wait_timeout); + MDL_request request; + MDL_REQUEST_INIT(&request, MDL_key::TABLE, "mysql", "innodb_table_stats", + MDL_SHARED, MDL_EXPLICIT); + if (UNIV_UNLIKELY(mdl_context->acquire_lock(&request, timeout))) + return true; + mdl_table= request.ticket; + MDL_REQUEST_INIT(&request, MDL_key::TABLE, "mysql", "innodb_index_stats", + MDL_SHARED, MDL_EXPLICIT); + if (UNIV_UNLIKELY(mdl_context->acquire_lock(&request, timeout))) + goto release_mdl; + mdl_index= request.ticket; + table_stats= dict_table_open_on_name("mysql/innodb_table_stats", false, + DICT_ERR_IGNORE_NONE); + if (!table_stats) + goto release_mdl; + index_stats= dict_table_open_on_name("mysql/innodb_index_stats", false, + DICT_ERR_IGNORE_NONE); + if (index_stats) + return false; + + table_stats->release(); +release_mdl: + if (mdl_index) + mdl_context->release_lock(mdl_index); + mdl_context->release_lock(mdl_table); + return true; +} + +void dict_stats::close() noexcept +{ + table_stats->release(); + index_stats->release(); + mdl_context->release_lock(mdl_table); + mdl_context->release_lock(mdl_index); +} + /**********************************************************************//** Adds system columns to a table object. */ void @@ -4389,7 +4399,7 @@ table_id_hash.free(); temp_id_hash.free(); - const ulint hash_size = buf_pool_get_curr_size() + const ulint hash_size = buf_pool.curr_pool_size() / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); table_hash.create(hash_size); table_id_hash.create(hash_size); diff -Nru mariadb-10.11.11/storage/innobase/dict/dict0load.cc mariadb-10.11.13/storage/innobase/dict/dict0load.cc --- mariadb-10.11.11/storage/innobase/dict/dict0load.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/dict/dict0load.cc 2025-05-19 16:14:25.000000000 +0000 @@ -2513,10 +2513,12 @@ if (!table->is_readable()) { /* Don't attempt to load the indexes from disk. */ } else if (err == DB_SUCCESS) { + auto i = fk_tables.size(); err = dict_load_foreigns(table->name.m_name, nullptr, 0, true, ignore_err, fk_tables); if (err != DB_SUCCESS) { + fk_tables.erase(fk_tables.begin() + i, fk_tables.end()); ib::warn() << "Load table " << table->name << " failed, the table has missing" " foreign key indexes. Turn off" diff -Nru mariadb-10.11.11/storage/innobase/dict/dict0stats.cc mariadb-10.11.13/storage/innobase/dict/dict0stats.cc --- mariadb-10.11.11/storage/innobase/dict/dict0stats.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/dict/dict0stats.cc 2025-05-19 16:14:25.000000000 +0000 @@ -359,7 +359,7 @@ if (!table) { if (opt_bootstrap) - return DB_TABLE_NOT_FOUND; + return DB_STATS_DO_NOT_EXIST; if (req_schema == &table_stats_schema) { if (innodb_table_stats_not_found_reported) { return DB_STATS_DO_NOT_EXIST; @@ -377,10 +377,10 @@ snprintf(errstr, errstr_sz, "Table %s not found.", req_schema->table_name_sql); - return DB_TABLE_NOT_FOUND; + return DB_STATS_DO_NOT_EXIST; } - if (!table->is_readable() && !table->space) { + if (!table->is_readable() || !table->space) { /* missing tablespace */ snprintf(errstr, errstr_sz, "Tablespace for table %s is missing.", @@ -491,11 +491,8 @@ return DB_SUCCESS; } -/*********************************************************************//** -Checks whether the persistent statistics storage exists and that all -tables have the proper structure. -@return true if exists and all tables are ok */ -static bool dict_stats_persistent_storage_check(bool dict_already_locked) +dict_stats_schema_check +dict_stats_persistent_storage_check(bool dict_already_locked) noexcept { char errstr[512]; dberr_t ret; @@ -521,14 +518,14 @@ switch (ret) { case DB_SUCCESS: - return true; + return SCHEMA_OK; + case DB_STATS_DO_NOT_EXIST: + return SCHEMA_NOT_EXIST; default: if (!opt_bootstrap) { - ib::error() << errstr; + sql_print_error("InnoDB: %s", errstr); } - /* fall through */ - case DB_STATS_DO_NOT_EXIST: - return false; + return SCHEMA_INVALID; } } @@ -544,13 +541,16 @@ { ut_ad(dict_sys.locked()); - if (!dict_stats_persistent_storage_check(true)) - { - pars_info_free(pinfo); - return DB_STATS_DO_NOT_EXIST; + switch (dict_stats_persistent_storage_check(true)) { + case SCHEMA_OK: + return que_eval_sql(pinfo, sql, trx); + case SCHEMA_INVALID: + case SCHEMA_NOT_EXIST: + break; } - return que_eval_sql(pinfo, sql, trx); + pars_info_free(pinfo); + return DB_STATS_DO_NOT_EXIST; } @@ -599,7 +599,7 @@ table->stat_clustered_index_size = 1; /* 1 page for each index, not counting the clustered */ table->stat_sum_of_other_index_sizes - = UT_LIST_GET_LEN(table->indexes) - 1; + = uint32_t(UT_LIST_GET_LEN(table->indexes) - 1); table->stat_modified_counter = 0; dict_index_t* index; @@ -617,7 +617,7 @@ dict_stats_empty_index(index, empty_defrag_stats); } - table->stat_initialized = TRUE; + table->stat = table->stat | dict_table_t::STATS_INITIALIZED; table->stats_mutex_unlock(); } @@ -658,16 +658,10 @@ /*==========================*/ const dict_table_t* table) /*!< in: table */ { - ut_a(table->stat_initialized); - MEM_CHECK_DEFINED(&table->stats_last_recalc, sizeof table->stats_last_recalc); - MEM_CHECK_DEFINED(&table->stat_persistent, - sizeof table->stat_persistent); - - MEM_CHECK_DEFINED(&table->stats_auto_recalc, - sizeof table->stats_auto_recalc); + MEM_CHECK_DEFINED(&table->stat, sizeof table->stat); MEM_CHECK_DEFINED(&table->stats_sample_pages, sizeof table->stats_sample_pages); @@ -844,8 +838,8 @@ ulint n_cols; ib_uint64_t* n_diff; ib_uint64_t* n_not_null; - ibool stats_null_not_equal; - uintmax_t n_sample_pages=1; /* number of pages to sample */ + bool stats_null_not_equal; + uint32_t n_sample_pages=1; /* number of pages to sample */ ulint not_empty_flag = 0; ulint total_external_size = 0; uintmax_t add_on; @@ -883,11 +877,11 @@ case SRV_STATS_NULLS_UNEQUAL: /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL case, we will treat NULLs as unequal value */ - stats_null_not_equal = TRUE; + stats_null_not_equal = true; break; case SRV_STATS_NULLS_EQUAL: - stats_null_not_equal = FALSE; + stats_null_not_equal = false; break; default: @@ -938,19 +932,21 @@ so taking all case2 paths is I, our expression is: n_pages = S < I? min(I,L) : I - */ - if (index->stat_index_size > 1) { - n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) - ? ut_min(index->stat_index_size, - static_cast( - log2(double(index->stat_index_size)) - * double(srv_stats_transient_sample_pages))) - : index->stat_index_size; + */ + if (uint32_t I = index->stat_index_size) { + const uint32_t S{srv_stats_transient_sample_pages}; + n_sample_pages = S < I + ? std::min(I, + uint32_t(log2(double(I)) + * double(S))) + : I; } } /* Sanity check */ - ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size <= 1 ? 1 : index->stat_index_size)); + ut_ad(n_sample_pages); + ut_ad(n_sample_pages <= (index->stat_index_size <= 1 + ? 1 : index->stat_index_size)); /* We sample some pages in the index to get an estimate */ btr_cur_t cursor; @@ -1169,7 +1165,7 @@ mtr.x_lock_space(index->table->space); - ulint dummy, size; + uint32_t dummy, size; index->stat_index_size = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF @@ -1209,24 +1205,12 @@ return err; } -/*********************************************************************//** -Calculates new estimates for table and index statistics. This function -is relatively quick and is used to calculate transient statistics that -are not saved on disk. -This was the only way to calculate statistics before the -Persistent Statistics feature was introduced. -@return error code -@retval DB_SUCCESS_LOCKED REC if the table under bulk insert operation */ -static -dberr_t -dict_stats_update_transient( -/*========================*/ - dict_table_t* table) /*!< in/out: table */ +dberr_t dict_stats_update_transient(dict_table_t *table) noexcept { ut_ad(!table->stats_mutex_is_owner()); dict_index_t* index; - ulint sum_of_index_sizes = 0; + uint32_t sum_of_index_sizes = 0; dberr_t err = DB_SUCCESS; /* Find out the sizes of the indexes and how many different values @@ -1234,17 +1218,16 @@ index = dict_table_get_first_index(table); - if (!table->space) { - /* Nothing to do. */ -empty_table: + if (!index || !table->space) { dict_stats_empty_table(table, true); - return err; - } else if (index == NULL) { - /* Table definition is corrupt */ + return DB_SUCCESS; + } - ib::warn() << "Table " << table->name - << " has no indexes. Cannot calculate statistics."; - goto empty_table; + if (trx_id_t bulk_trx_id = table->bulk_trx_id) { + if (trx_sys.find(nullptr, bulk_trx_id, false)) { + dict_stats_empty_table(table, false); + return DB_SUCCESS_LOCKED_REC; + } } for (; index != NULL; index = dict_table_get_next_index(index)) { @@ -1285,7 +1268,7 @@ table->stat_modified_counter = 0; - table->stat_initialized = TRUE; + table->stat = table->stat | dict_table_t::STATS_INITIALIZED; table->stats_mutex_unlock(); @@ -2225,8 +2208,8 @@ struct index_stats_t { std::vector stats; - ulint index_size; - ulint n_leaf_pages; + uint32_t index_size; + uint32_t n_leaf_pages; index_stats_t(ulint n_uniq) : index_size(1), n_leaf_pages(1) { @@ -2365,7 +2348,7 @@ uint16_t root_level = btr_page_get_level(root->page.frame); mtr.x_lock_space(index->table->space); - ulint dummy, size; + uint32_t dummy, size; result.index_size = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF + root->page.frame, &size, &mtr) @@ -2635,17 +2618,7 @@ DBUG_RETURN(result); } -/*********************************************************************//** -Calculates new estimates for table and index statistics. This function -is relatively slow and is used to calculate persistent statistics that -will be saved on disk. -@return DB_SUCCESS or error code -@retval DB_SUCCESS_LOCKED_REC if the table under bulk insert operation */ -static -dberr_t -dict_stats_update_persistent( -/*=========================*/ - dict_table_t* table) /*!< in/out: table */ +dberr_t dict_stats_update_persistent(dict_table_t *table) noexcept { dict_index_t* index; @@ -2653,6 +2626,13 @@ DEBUG_SYNC_C("dict_stats_update_persistent"); + if (trx_id_t bulk_trx_id = table->bulk_trx_id) { + if (trx_sys.find(nullptr, bulk_trx_id, false)) { + dict_stats_empty_table(table, false); + return DB_SUCCESS_LOCKED_REC; + } + } + /* analyze the clustered index first */ index = dict_table_get_first_index(table); @@ -2742,7 +2722,7 @@ table->stat_modified_counter = 0; - table->stat_initialized = TRUE; + table->stat = table->stat | dict_table_t::STATS_INITIALIZED; dict_stats_assert_initialized(table); @@ -2751,6 +2731,18 @@ return(DB_SUCCESS); } +dberr_t dict_stats_update_persistent_try(dict_table_t *table) +{ + if (table->stats_is_persistent() && + dict_stats_persistent_storage_check(false) == SCHEMA_OK) + { + if (dberr_t err= dict_stats_update_persistent(table)) + return err; + return dict_stats_save(table); + } + return DB_SUCCESS; +} + #include "mysql_com.h" /** Save an individual index's statistic into the persistent statistics storage. @@ -2829,14 +2821,14 @@ "END;", trx); if (UNIV_UNLIKELY(ret != DB_SUCCESS)) { - if (innodb_index_stats_not_found == false && - index->stats_error_printed == false) { + if (innodb_index_stats_not_found == false + && !index->table->stats_error_printed) { + index->table->stats_error_printed = true; ib::error() << "Cannot save index statistics for table " << index->table->name << ", index " << index->name << ", stat name \"" << stat_name << "\": " << ret; - index->stats_error_printed = true; } } @@ -2878,27 +2870,29 @@ return err; } -/** Save the table's statistics into the persistent statistics storage. -@param[in] table table whose stats to save -@param[in] only_for_index if this is non-NULL, then stats for indexes -that are not equal to it will not be saved, if NULL, then all indexes' stats -are saved +/** Save the persistent statistics of a table or an index. +@param table table whose stats to save +@param only_for_index the index ID to save statistics for (0=all) @return DB_SUCCESS or error code */ -static -dberr_t -dict_stats_save( - dict_table_t* table, - const index_id_t* only_for_index) +dberr_t dict_stats_save(dict_table_t* table, index_id_t index_id) { pars_info_t* pinfo; char db_utf8[MAX_DB_UTF8_LEN]; char table_utf8[MAX_TABLE_UTF8_LEN]; + THD* const thd = current_thd; #ifdef ENABLED_DEBUG_SYNC DBUG_EXECUTE_IF("dict_stats_save_exit_notify", + SCOPE_EXIT([thd] { + debug_sync_set_action(thd, + STRING_WITH_LEN("now SIGNAL dict_stats_save_finished")); + }); + ); + DBUG_EXECUTE_IF("dict_stats_save_exit_notify_and_wait", SCOPE_EXIT([] { debug_sync_set_action(current_thd, - STRING_WITH_LEN("now SIGNAL dict_stats_save_finished")); + STRING_WITH_LEN("now SIGNAL dict_stats_save_finished" + " WAIT_FOR dict_stats_save_unblock")); }); ); #endif /* ENABLED_DEBUG_SYNC */ @@ -2911,41 +2905,10 @@ return (dict_stats_report_error(table)); } - THD* thd = current_thd; - MDL_ticket *mdl_table = nullptr, *mdl_index = nullptr; - dict_table_t* table_stats = dict_table_open_on_name( - TABLE_STATS_NAME, false, DICT_ERR_IGNORE_NONE); - if (table_stats) { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats = dict_acquire_mdl_shared(table_stats, thd, - &mdl_table); - dict_sys.unfreeze(); - } - if (!table_stats - || strcmp(table_stats->name.m_name, TABLE_STATS_NAME)) { -release_and_exit: - if (table_stats) { - dict_table_close(table_stats, false, thd, mdl_table); - } + dict_stats stats; + if (stats.open(thd)) { return DB_STATS_DO_NOT_EXIST; } - - dict_table_t* index_stats = dict_table_open_on_name( - INDEX_STATS_NAME, false, DICT_ERR_IGNORE_NONE); - if (index_stats) { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats = dict_acquire_mdl_shared(index_stats, thd, - &mdl_index); - dict_sys.unfreeze(); - } - if (!index_stats) { - goto release_and_exit; - } - if (strcmp(index_stats->name.m_name, INDEX_STATS_NAME)) { - dict_table_close(index_stats, false, thd, mdl_index); - goto release_and_exit; - } - dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8), table_utf8, sizeof(table_utf8)); const time_t now = time(NULL); @@ -2954,9 +2917,9 @@ trx_start_internal(trx); dberr_t ret = trx->read_only ? DB_READ_ONLY - : lock_table_for_trx(table_stats, trx, LOCK_X); + : lock_table_for_trx(stats.table(), trx, LOCK_X); if (ret == DB_SUCCESS) { - ret = lock_table_for_trx(index_stats, trx, LOCK_X); + ret = lock_table_for_trx(stats.index(), trx, LOCK_X); } if (ret != DB_SUCCESS) { if (trx->state != TRX_STATE_NOT_STARTED) { @@ -3002,8 +2965,14 @@ "END;", trx); if (UNIV_UNLIKELY(ret != DB_SUCCESS)) { - ib::error() << "Cannot save table statistics for table " - << table->name << ": " << ret; + sql_print_error("InnoDB: Cannot save table statistics for" +#ifdef EMBEDDED_LIBRARY + " table %.*s.%s: %s", +#else + " table %`.*s.%`s: %s", +#endif + int(table->name.dblen()), table->name.m_name, + table->name.basename(), ut_strerr(ret)); rollback_and_exit: trx->rollback(); free_and_exit: @@ -3011,8 +2980,7 @@ dict_sys.unlock(); unlocked_free_and_exit: trx->free(); - dict_table_close(table_stats, false, thd, mdl_table); - dict_table_close(index_stats, false, thd, mdl_index); + stats.close(); return ret; } @@ -3046,7 +3014,7 @@ index = it->second; - if (only_for_index != NULL && index->id != *only_for_index) { + if (index_id != 0 && index->id != index_id) { continue; } @@ -3116,6 +3084,14 @@ goto free_and_exit; } +void dict_stats_empty_table_and_save(dict_table_t *table) +{ + dict_stats_empty_table(table, true); + if (table->stats_is_persistent() && + dict_stats_persistent_storage_check(false) == SCHEMA_OK) + dict_stats_save(table); +} + /*********************************************************************//** Called for the row that is selected by SELECT ... FROM mysql.innodb_table_stats WHERE table='...' @@ -3164,8 +3140,7 @@ ut_a(len == 8); table->stat_clustered_index_size - = std::max( - (ulint) mach_read_from_8(data), 1); + = std::max(mach_read_from_4(data + 4), 1U); break; } @@ -3174,18 +3149,9 @@ ut_a(dtype_get_mtype(type) == DATA_INT); ut_a(len == 8); - ulint stat_other_idx_size - = (ulint) mach_read_from_8(data); - if (!stat_other_idx_size - && UT_LIST_GET_LEN(table->indexes) > 1) { - stat_other_idx_size - = UT_LIST_GET_LEN(table->indexes) - 1; - } - table->stat_sum_of_other_index_sizes - = std::max( - (ulint) mach_read_from_8(data), - UT_LIST_GET_LEN(table->indexes) - 1); - + table->stat_sum_of_other_index_sizes = std::max( + mach_read_from_4(data + 4), + uint32_t(UT_LIST_GET_LEN(table->indexes) - 1)); break; } default: @@ -3370,14 +3336,12 @@ if (stat_name_len == 4 /* strlen("size") */ && strncasecmp("size", stat_name, stat_name_len) == 0) { - index->stat_index_size - = std::max((ulint) stat_value, 1); + index->stat_index_size = std::max(uint32_t(stat_value), 1U); arg->stats_were_modified = true; } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */ && strncasecmp("n_leaf_pages", stat_name, stat_name_len) == 0) { - index->stat_n_leaf_pages - = std::max((ulint) stat_value, 1); + index->stat_n_leaf_pages = std::max(uint32_t(stat_value), 1U); arg->stats_were_modified = true; } else if (stat_name_len == 12 /* strlen("n_page_split") */ && strncasecmp("n_page_split", stat_name, stat_name_len) @@ -3477,19 +3441,11 @@ return(TRUE); } -/*********************************************************************//** -Read table's statistics from the persistent statistics storage. -@return DB_SUCCESS or error code */ -static -dberr_t -dict_stats_fetch_from_ps( -/*=====================*/ - dict_table_t* table) /*!< in/out: table */ +/** Read the stored persistent statistics of a table. */ +dberr_t dict_stats_fetch_from_ps(dict_table_t *table) { index_fetch_t index_fetch_arg; - trx_t* trx; pars_info_t* pinfo; - dberr_t ret; char db_utf8[MAX_DB_UTF8_LEN]; char table_utf8[MAX_TABLE_UTF8_LEN]; @@ -3499,49 +3455,16 @@ stats. */ dict_stats_empty_table(table, true); - THD* thd = current_thd; - MDL_ticket *mdl_table = nullptr, *mdl_index = nullptr; - dict_table_t* table_stats = dict_table_open_on_name( - TABLE_STATS_NAME, false, DICT_ERR_IGNORE_NONE); - if (table_stats) { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats = dict_acquire_mdl_shared(table_stats, thd, - &mdl_table); - dict_sys.unfreeze(); - } - if (!table_stats - || strcmp(table_stats->name.m_name, TABLE_STATS_NAME)) { -release_and_exit: - if (table_stats) { - dict_table_close(table_stats, false, thd, mdl_table); - } + THD* const thd = current_thd; + dict_stats stats; + if (stats.open(thd)) { return DB_STATS_DO_NOT_EXIST; } - dict_table_t* index_stats = dict_table_open_on_name( - INDEX_STATS_NAME, false, DICT_ERR_IGNORE_NONE); - if (index_stats) { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats = dict_acquire_mdl_shared(index_stats, thd, - &mdl_index); - dict_sys.unfreeze(); - } - if (!index_stats) { - goto release_and_exit; - } - if (strcmp(index_stats->name.m_name, INDEX_STATS_NAME)) { - dict_table_close(index_stats, false, thd, mdl_index); - goto release_and_exit; - } - #ifdef ENABLED_DEBUG_SYNC DEBUG_SYNC(thd, "dict_stats_mdl_acquired"); #endif /* ENABLED_DEBUG_SYNC */ - trx = trx_create(); - - trx_start_internal_read_only(trx); - dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8), table_utf8, sizeof(table_utf8)); @@ -3562,76 +3485,77 @@ "fetch_index_stats_step", dict_stats_fetch_index_stats_step, &index_fetch_arg); - dict_sys.lock(SRW_LOCK_CALL); /* FIXME: remove this */ - ret = que_eval_sql(pinfo, - "PROCEDURE FETCH_STATS () IS\n" - "found INT;\n" - "DECLARE FUNCTION fetch_table_stats_step;\n" - "DECLARE FUNCTION fetch_index_stats_step;\n" - "DECLARE CURSOR table_stats_cur IS\n" - " SELECT\n" - /* if you change the selected fields, be - sure to adjust - dict_stats_fetch_table_stats_step() */ - " n_rows,\n" - " clustered_index_size,\n" - " sum_of_other_index_sizes\n" - " FROM \"" TABLE_STATS_NAME "\"\n" - " WHERE\n" - " database_name = :database_name AND\n" - " table_name = :table_name;\n" - "DECLARE CURSOR index_stats_cur IS\n" - " SELECT\n" - /* if you change the selected fields, be - sure to adjust - dict_stats_fetch_index_stats_step() */ - " index_name,\n" - " stat_name,\n" - " stat_value,\n" - " sample_size\n" - " FROM \"" INDEX_STATS_NAME "\"\n" - " WHERE\n" - " database_name = :database_name AND\n" - " table_name = :table_name;\n" - - "BEGIN\n" - - "OPEN table_stats_cur;\n" - "FETCH table_stats_cur INTO\n" - " fetch_table_stats_step();\n" - "IF (SQL % NOTFOUND) THEN\n" - " CLOSE table_stats_cur;\n" - " RETURN;\n" - "END IF;\n" - "CLOSE table_stats_cur;\n" - - "OPEN index_stats_cur;\n" - "found := 1;\n" - "WHILE found = 1 LOOP\n" - " FETCH index_stats_cur INTO\n" - " fetch_index_stats_step();\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE index_stats_cur;\n" + dict_sys.lock(SRW_LOCK_CALL); + que_t* graph = pars_sql( + pinfo, + "PROCEDURE FETCH_STATS () IS\n" + "found INT;\n" + "DECLARE FUNCTION fetch_table_stats_step;\n" + "DECLARE FUNCTION fetch_index_stats_step;\n" + "DECLARE CURSOR table_stats_cur IS\n" + " SELECT\n" + /* if you change the selected fields, be + sure to adjust + dict_stats_fetch_table_stats_step() */ + " n_rows,\n" + " clustered_index_size,\n" + " sum_of_other_index_sizes\n" + " FROM \"" TABLE_STATS_NAME "\"\n" + " WHERE\n" + " database_name = :database_name AND\n" + " table_name = :table_name;\n" + "DECLARE CURSOR index_stats_cur IS\n" + " SELECT\n" + /* if you change the selected fields, be + sure to adjust + dict_stats_fetch_index_stats_step() */ + " index_name,\n" + " stat_name,\n" + " stat_value,\n" + " sample_size\n" + " FROM \"" INDEX_STATS_NAME "\"\n" + " WHERE\n" + " database_name = :database_name AND\n" + " table_name = :table_name;\n" + + "BEGIN\n" + + "OPEN table_stats_cur;\n" + "FETCH table_stats_cur INTO\n" + " fetch_table_stats_step();\n" + "IF (SQL % NOTFOUND) THEN\n" + " CLOSE table_stats_cur;\n" + " RETURN;\n" + "END IF;\n" + "CLOSE table_stats_cur;\n" + + "OPEN index_stats_cur;\n" + "found := 1;\n" + "WHILE found = 1 LOOP\n" + " FETCH index_stats_cur INTO\n" + " fetch_index_stats_step();\n" + " IF (SQL % NOTFOUND) THEN\n" + " found := 0;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE index_stats_cur;\n" - "END;", trx); - /* pinfo is freed by que_eval_sql() */ + "END;"); dict_sys.unlock(); - dict_table_close(table_stats, false, thd, mdl_table); - dict_table_close(index_stats, false, thd, mdl_index); + trx_t* trx = trx_create(); + trx->graph = nullptr; + graph->trx = trx; + trx_start_internal_read_only(trx); + que_run_threads(que_fork_start_command(graph)); + que_graph_free(graph); trx_commit_for_mysql(trx); - + dberr_t ret = index_fetch_arg.stats_were_modified + ? trx->error_state : DB_STATS_DO_NOT_EXIST; trx->free(); - - if (!index_fetch_arg.stats_were_modified) { - return(DB_STATS_DO_NOT_EXIST); - } - - return(ret); + stats.close(); + return ret; } /*********************************************************************//** @@ -3641,250 +3565,46 @@ /*========================*/ dict_index_t* index) /*!< in/out: index */ { - DBUG_ENTER("dict_stats_update_for_index"); - - if (dict_stats_is_persistent_enabled(index->table)) { - - if (dict_stats_persistent_storage_check(false)) { - index_stats_t stats = dict_stats_analyze_index(index); - index->table->stats_mutex_lock(); - index->stat_index_size = stats.index_size; - index->stat_n_leaf_pages = stats.n_leaf_pages; - for (size_t i = 0; i < stats.stats.size(); ++i) { - index->stat_n_diff_key_vals[i] - = stats.stats[i].n_diff_key_vals; - index->stat_n_sample_sizes[i] - = stats.stats[i].n_sample_sizes; - index->stat_n_non_null_key_vals[i] - = stats.stats[i].n_non_null_key_vals; - } - index->table->stat_sum_of_other_index_sizes - += index->stat_index_size; - index->table->stats_mutex_unlock(); - - dict_stats_save(index->table, &index->id); - DBUG_VOID_RETURN; - } - /* else */ - - if (innodb_index_stats_not_found == false && - index->stats_error_printed == false) { - /* Fall back to transient stats since the persistent - storage is not present or is corrupted */ - - ib::info() << "Recalculation of persistent statistics" - " requested for table " << index->table->name - << " index " << index->name - << " but the required" - " persistent statistics storage is not present or is" - " corrupted. Using transient stats instead."; - index->stats_error_printed = false; - } - } - - dict_stats_update_transient_for_index(index); - - DBUG_VOID_RETURN; -} - -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. -@return DB_SUCCESS or error code -@retval DB_SUCCESS_LOCKED_REC if the table under bulk insert operation */ -dberr_t -dict_stats_update( -/*==============*/ - dict_table_t* table, /*!< in/out: table */ - dict_stats_upd_option_t stats_upd_option) - /*!< in: whether to (re) calc - the stats or to fetch them from - the persistent statistics - storage */ -{ - ut_ad(!table->stats_mutex_is_owner()); - - if (!table->is_readable()) { - return (dict_stats_report_error(table)); - } else if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { - /* If we have set a high innodb_force_recovery level, do - not calculate statistics, as a badly corrupted index can - cause a crash in it. */ - dict_stats_empty_table(table, false); - return(DB_SUCCESS); - } - - if (trx_id_t bulk_trx_id = table->bulk_trx_id) { - if (trx_sys.find(nullptr, bulk_trx_id, false)) { - dict_stats_empty_table(table, false); - return DB_SUCCESS_LOCKED_REC; - } - } - - switch (stats_upd_option) { - case DICT_STATS_RECALC_PERSISTENT: - - if (srv_read_only_mode) { - goto transient; - } - - /* Persistent recalculation requested, called from - 1) ANALYZE TABLE, or - 2) the auto recalculation background thread, or - 3) open table if stats do not exist on disk and auto recalc - is enabled */ - - /* InnoDB internal tables (e.g. SYS_TABLES) cannot have - persistent stats enabled */ - ut_a(strchr(table->name.m_name, '/') != NULL); - - /* check if the persistent statistics storage exists - before calling the potentially slow function - dict_stats_update_persistent(); that is a - prerequisite for dict_stats_save() succeeding */ - if (dict_stats_persistent_storage_check(false)) { - - dberr_t err; - - err = dict_stats_update_persistent(table); - - if (err != DB_SUCCESS) { - return(err); - } - - err = dict_stats_save(table, NULL); - - return(err); - } + dict_table_t *const table= index->table; + ut_ad(table->stat_initialized()); - /* Fall back to transient stats since the persistent - storage is not present or is corrupted */ - - if (innodb_table_stats_not_found == false && - table->stats_error_printed == false) { - ib::warn() << "Recalculation of persistent statistics" - " requested for table " - << table->name - << " but the required persistent" - " statistics storage is not present or is corrupted." - " Using transient stats instead."; - table->stats_error_printed = true; - } - - goto transient; - - case DICT_STATS_RECALC_TRANSIENT: - - goto transient; - - case DICT_STATS_EMPTY_TABLE: - - dict_stats_empty_table(table, true); - - /* If table is using persistent stats, - then save the stats on disk */ - - if (dict_stats_is_persistent_enabled(table)) { - - if (dict_stats_persistent_storage_check(false)) { - - return(dict_stats_save(table, NULL)); - } - - return(DB_STATS_DO_NOT_EXIST); - } - - return(DB_SUCCESS); - - case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY: - - /* fetch requested, either fetch from persistent statistics - storage or use the old method */ - - if (table->stat_initialized) { - return(DB_SUCCESS); - } - - /* InnoDB internal tables (e.g. SYS_TABLES) cannot have - persistent stats enabled */ - ut_a(strchr(table->name.m_name, '/') != NULL); - - if (!dict_stats_persistent_storage_check(false)) { - /* persistent statistics storage does not exist - or is corrupted, calculate the transient stats */ - - if (innodb_table_stats_not_found == false && - table->stats_error_printed == false && - !opt_bootstrap) { - ib::error() << "Fetch of persistent statistics" - " requested for table " - << table->name - << " but the required system tables " - << TABLE_STATS_NAME_PRINT - << " and " << INDEX_STATS_NAME_PRINT - << " are not present or have unexpected" - " structure. Using transient stats instead."; - table->stats_error_printed = true; - } - - goto transient; - } - - dberr_t err = dict_stats_fetch_from_ps(table); - - switch (err) { - case DB_SUCCESS: - return(DB_SUCCESS); - case DB_STATS_DO_NOT_EXIST: - - if (srv_read_only_mode) { - goto transient; - } -#ifdef WITH_WSREP - if (wsrep_thd_skip_locking(current_thd)) { - goto transient; - } + if (table->stats_is_persistent()) + switch (dict_stats_persistent_storage_check(false)) { + case SCHEMA_NOT_EXIST: + break; + case SCHEMA_INVALID: + if (table->stats_error_printed) + break; + table->stats_error_printed= true; + sql_print_information("InnoDB: Recalculation of persistent statistics" +#ifdef EMBEDDED_LIBRARY + " requested for table %.*s.%s index %s but" +#else + " requested for table %`.*s.%`s index %`s but" #endif - if (dict_stats_auto_recalc_is_enabled(table)) { - return(dict_stats_update( - table, - DICT_STATS_RECALC_PERSISTENT)); - } - - ib::info() << "Trying to use table " << table->name - << " which has persistent statistics enabled," - " but auto recalculation turned off and the" - " statistics do not exist in " - TABLE_STATS_NAME_PRINT - " and " INDEX_STATS_NAME_PRINT - ". Please either run \"ANALYZE TABLE " - << table->name << ";\" manually or enable the" - " auto recalculation with \"ALTER TABLE " - << table->name << " STATS_AUTO_RECALC=1;\"." - " InnoDB will now use transient statistics for " - << table->name << "."; - - goto transient; - default: - - if (innodb_table_stats_not_found == false && - table->stats_error_printed == false) { - ib::error() << "Error fetching persistent statistics" - " for table " - << table->name - << " from " TABLE_STATS_NAME_PRINT " and " - INDEX_STATS_NAME_PRINT ": " << err - << ". Using transient stats method instead."; - } - - goto transient; - } - /* no "default:" in order to produce a compilation warning - about unhandled enumeration value */ - } + " the required persistent statistics storage" + " is corrupted. Using transient stats instead.", + int(table->name.dblen()), table->name.m_name, + table->name.basename(), index->name()); + break; + case SCHEMA_OK: + index_stats_t stats{dict_stats_analyze_index(index)}; + table->stats_mutex_lock(); + index->stat_index_size = stats.index_size; + index->stat_n_leaf_pages = stats.n_leaf_pages; + for (size_t i = 0; i < stats.stats.size(); ++i) + { + index->stat_n_diff_key_vals[i]= stats.stats[i].n_diff_key_vals; + index->stat_n_sample_sizes[i]= stats.stats[i].n_sample_sizes; + index->stat_n_non_null_key_vals[i]= stats.stats[i].n_non_null_key_vals; + } + table->stat_sum_of_other_index_sizes+= index->stat_index_size; + table->stats_mutex_unlock(); + dict_stats_save(table, index->id); + return; + } -transient: - return dict_stats_update_transient(table); + dict_stats_update_transient_for_index(index); } /** Execute DELETE FROM mysql.innodb_table_stats @@ -4034,7 +3754,7 @@ const char *old_name, const char *new_name, trx_t *trx) { - if (!dict_stats_persistent_storage_check(true)) + if (dict_stats_persistent_storage_check(true) != SCHEMA_OK) return DB_STATS_DO_NOT_EXIST; pars_info_t *pinfo= pars_info_create(); @@ -4170,7 +3890,7 @@ index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE; index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE; - ret = dict_stats_save(&table, NULL); + ret = dict_stats_save(&table); ut_a(ret == DB_SUCCESS); diff -Nru mariadb-10.11.11/storage/innobase/dict/dict0stats_bg.cc mariadb-10.11.13/storage/innobase/dict/dict0stats_bg.cc --- mariadb-10.11.11/storage/innobase/dict/dict0stats_bg.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/dict/dict0stats_bg.cc 2025-05-19 16:14:25.000000000 +0000 @@ -135,7 +135,9 @@ void dict_stats_update_if_needed_func(dict_table_t *table) #endif { - if (UNIV_UNLIKELY(!table->stat_initialized)) { + uint32_t stat{table->stat}; + + if (UNIV_UNLIKELY(!table->stat_initialized(stat))) { /* The table may have been evicted from dict_sys and reloaded internally by InnoDB for FOREIGN KEY processing, but not reloaded by the SQL layer. @@ -154,13 +156,9 @@ ulonglong counter = table->stat_modified_counter++; ulonglong n_rows = dict_table_get_n_rows(table); - if (dict_stats_is_persistent_enabled(table)) { - if (table->name.is_temporary()) { - return; - } - if (counter > n_rows / 10 /* 10% */ - && dict_stats_auto_recalc_is_enabled(table)) { - + if (table->stats_is_persistent(stat)) { + if (table->stats_is_auto_recalc(stat) + && counter > n_rows / 10 && !table->name.is_temporary()) { #ifdef WITH_WSREP /* Do not add table to background statistic calculation if this thread is not a @@ -203,7 +201,7 @@ if (counter > threshold) { /* this will reset table->stat_modified_counter to 0 */ - dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT); + dict_stats_update_transient(table); } } @@ -331,7 +329,7 @@ if (!mdl || !table->is_accessible()) { - dict_table_close(table, false, thd, mdl); + dict_table_close(table, thd, mdl); goto invalid_table_id; } @@ -345,10 +343,10 @@ difftime(time(nullptr), table->stats_last_recalc) >= MIN_RECALC_INTERVAL; const dberr_t err= update_now - ? dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT) + ? dict_stats_update_persistent_try(table) : DB_SUCCESS_LOCKED_REC; - dict_table_close(table, false, thd, mdl); + dict_table_close(table, thd, mdl); mysql_mutex_lock(&recalc_pool_mutex); auto i= std::find_if(recalc_pool.begin(), recalc_pool.end(), diff -Nru mariadb-10.11.11/storage/innobase/fsp/fsp0fsp.cc mariadb-10.11.13/storage/innobase/fsp/fsp0fsp.cc --- mariadb-10.11.11/storage/innobase/fsp/fsp0fsp.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/fsp/fsp0fsp.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1644,12 +1644,11 @@ /** Calculate reserved fragment page slots. @param inode file segment index @return number of fragment pages */ -static ulint fseg_get_n_frag_pages(const fseg_inode_t *inode) +static uint32_t fseg_get_n_frag_pages(const fseg_inode_t *inode) noexcept { - ulint i; - ulint count = 0; + uint32_t count = 0; - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { + for (ulint i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i)) { count++; } @@ -1794,21 +1793,24 @@ currently used. @return number of reserved pages */ static -ulint +uint32_t fseg_n_reserved_pages_low( /*======================*/ const fseg_inode_t* inode, /*!< in: segment inode */ - ulint* used) /*!< out: number of pages used (not + uint32_t* used) /*!< out: number of pages used (not more than reserved) */ + noexcept { + const uint32_t extent_size = FSP_EXTENT_SIZE; + *used = mach_read_from_4(inode + FSEG_NOT_FULL_N_USED) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL) + + extent_size * flst_get_len(inode + FSEG_FULL) + fseg_get_n_frag_pages(inode); return fseg_get_n_frag_pages(inode) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL); + + extent_size * flst_get_len(inode + FSEG_FREE) + + extent_size * flst_get_len(inode + FSEG_NOT_FULL) + + extent_size * flst_get_len(inode + FSEG_FULL); } /** Calculate the number of pages reserved by a segment, @@ -1818,9 +1820,9 @@ @param[out] used number of pages that are used (not more than reserved) @param[in,out] mtr mini-transaction @return number of reserved pages */ -ulint fseg_n_reserved_pages(const buf_block_t &block, - const fseg_header_t *header, ulint *used, - mtr_t *mtr) +uint32_t fseg_n_reserved_pages(const buf_block_t &block, + const fseg_header_t *header, uint32_t *used, + mtr_t *mtr) noexcept { ut_ad(page_align(header) == block.page.frame); buf_block_t *iblock; @@ -1845,7 +1847,7 @@ buf_block_t *iblock, fil_space_t *space, uint32_t hint, mtr_t *mtr) { - ulint used; + uint32_t used; ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); ut_d(space->modify_check(*mtr)); @@ -1996,8 +1998,7 @@ dberr_t* err) { ib_id_t seg_id; - ulint used; - ulint reserved; + uint32_t used, reserved; xdes_t* descr; /*!< extent of the hinted page */ uint32_t ret_page; /*!< the allocated page offset, FIL_NULL if could not be allocated */ diff -Nru mariadb-10.11.11/storage/innobase/fts/fts0config.cc mariadb-10.11.13/storage/innobase/fts/fts0config.cc --- mariadb-10.11.11/storage/innobase/fts/fts0config.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/fts/fts0config.cc 2025-05-19 16:14:25.000000000 +0000 @@ -231,7 +231,7 @@ n_rows_updated = trx->undo_no - undo_no; /* Check if we need to do an insert. */ - if (n_rows_updated == 0) { + if (error == DB_SUCCESS && n_rows_updated == 0) { info = pars_info_create(); pars_info_bind_varchar_literal( diff -Nru mariadb-10.11.11/storage/innobase/fts/fts0fts.cc mariadb-10.11.13/storage/innobase/fts/fts0fts.cc --- mariadb-10.11.11/storage/innobase/fts/fts0fts.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/fts/fts0fts.cc 2025-05-19 16:14:25.000000000 +0000 @@ -37,6 +37,7 @@ #include "fts0plugin.h" #include "dict0stats.h" #include "btr0pcur.h" +#include "log.h" static const ulint FTS_MAX_ID_LEN = 32; @@ -1870,8 +1871,10 @@ } } - ib::warn() << "Failed to create FTS common table " << fts_table_name; - trx->error_state = error; + ut_ad(trx->state == TRX_STATE_NOT_STARTED + || trx->error_state == error); + sql_print_warning("InnoDB: Failed to create FTS common table %s: %s", + fts_table_name, ut_strerr(error)); return NULL; } @@ -2055,8 +2058,10 @@ } } - ib::warn() << "Failed to create FTS index table " << table_name; - trx->error_state = error; + ut_ad(trx->state == TRX_STATE_NOT_STARTED + || trx->error_state == error); + sql_print_warning("InnoDB: Failed to create FTS index table %s: %s", + table_name, ut_strerr(error)); return NULL; } diff -Nru mariadb-10.11.11/storage/innobase/fts/fts0opt.cc mariadb-10.11.13/storage/innobase/fts/fts0opt.cc --- mariadb-10.11.11/storage/innobase/fts/fts0opt.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/fts/fts0opt.cc 2025-05-19 16:14:25.000000000 +0000 @@ -2809,7 +2809,7 @@ std::this_thread::sleep_for(std::chrono::seconds(6));); if (mdl_ticket) - dict_table_close(sync_table, false, fts_opt_thd, mdl_ticket); + dict_table_close(sync_table, fts_opt_thd, mdl_ticket); } /**********************************************************************//** diff -Nru mariadb-10.11.11/storage/innobase/gis/gis0sea.cc mariadb-10.11.13/storage/innobase/gis/gis0sea.cc --- mariadb-10.11.11/storage/innobase/gis/gis0sea.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/gis/gis0sea.cc 2025-05-19 16:14:25.000000000 +0000 @@ -504,10 +504,10 @@ rtr_rec_t rec; rec = rtr_info->matches->matched_recs->back(); rtr_info->matches->matched_recs->pop_back(); + cursor->btr_cur.page_cur.block = rtr_info->matches->block; mysql_mutex_unlock(&rtr_info->matches->rtr_match_mutex); cursor->btr_cur.page_cur.rec = rec.r_rec; - cursor->btr_cur.page_cur.block = rtr_info->matches->block; DEBUG_SYNC_C("rtr_pcur_move_to_next_return"); return(true); @@ -1565,7 +1565,10 @@ if (auto matches = rtr_info->matches) { mysql_mutex_lock(&matches->rtr_match_mutex); - if (matches->block->page.id() == id) { + /* matches->block could be nullptr when cursor + encounters empty table */ + if (rtr_info->matches->block + && matches->block->page.id() == id) { matches->matched_recs->clear(); matches->valid = false; } @@ -2201,6 +2204,15 @@ ut_ad(orig_mode != PAGE_CUR_RTREE_LOCATE); + /* Collect matched records on page */ + offsets = rec_get_offsets( + rec, index, offsets, + index->n_fields, + ULINT_UNDEFINED, &heap); + + mysql_mutex_lock( + &rtr_info->matches->rtr_match_mutex); + if (!match_init) { rtr_init_match( rtr_info->matches, @@ -2208,14 +2220,12 @@ match_init = true; } - /* Collect matched records on page */ - offsets = rec_get_offsets( - rec, index, offsets, - index->n_fields, - ULINT_UNDEFINED, &heap); rtr_leaf_push_match_rec( rec, rtr_info, offsets, page_is_comp(page)); + + mysql_mutex_unlock( + &rtr_info->matches->rtr_match_mutex); } last_match_rec = rec; diff -Nru mariadb-10.11.11/storage/innobase/handler/ha_innodb.cc mariadb-10.11.13/storage/innobase/handler/ha_innodb.cc --- mariadb-10.11.11/storage/innobase/handler/ha_innodb.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/handler/ha_innodb.cc 2025-05-19 16:14:25.000000000 +0000 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -154,11 +155,6 @@ #include "wsrep_sst.h" #endif /* WITH_WSREP */ -#ifdef HAVE_URING -/** The Linux kernel version if io_uring() is considered unsafe */ -const char *io_uring_may_be_unsafe; -#endif - #define INSIDE_HA_INNOBASE_CC #define EQ_CURRENT_THD(thd) ((thd) == current_thd) @@ -169,13 +165,9 @@ static const long AUTOINC_NEW_STYLE_LOCKING = 1; static const long AUTOINC_NO_LOCKING = 2; -static constexpr size_t buf_pool_chunk_min_size= 1U << 20; - static ulong innobase_open_files; static long innobase_autoinc_lock_mode; -ulonglong innobase_buffer_pool_size; - /** Percentage of the buffer pool to reserve for 'old' blocks. Connected to buf_LRU_old_ratio. */ static uint innobase_old_blocks_pct; @@ -246,11 +238,11 @@ if (thd_kill_level(thd)) break; /* Adjust for purge_coordinator_state::refresh() */ - log_sys.latch.rd_lock(SRW_LOCK_CALL); + log_sys.latch.wr_lock(SRW_LOCK_CALL); const lsn_t last= log_sys.last_checkpoint_lsn, max_age= log_sys.max_checkpoint_age; - log_sys.latch.rd_unlock(); const lsn_t lsn= log_sys.get_lsn(); + log_sys.latch.wr_unlock(); if ((lsn - last) / 4 >= max_age / 5) buf_flush_ahead(last + max_age / 5, false); purge_sys.wake_if_not_active(); @@ -1158,7 +1150,7 @@ be rolled back to savepoint */ /** Request notification of log writes */ -static void innodb_log_flush_request(void *cookie); +static void innodb_log_flush_request(void *cookie) noexcept; /** Requests for log flushes */ struct log_flush_request @@ -1330,38 +1322,17 @@ dict_sys.unlock(); - dict_table_t *table_stats, *index_stats; - MDL_ticket *mdl_table= nullptr, *mdl_index= nullptr; - table_stats= dict_table_open_on_name(TABLE_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (table_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats= dict_acquire_mdl_shared(table_stats, - thd, &mdl_table); - dict_sys.unfreeze(); - } - index_stats= dict_table_open_on_name(INDEX_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (index_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats= dict_acquire_mdl_shared(index_stats, - thd, &mdl_index); - dict_sys.unfreeze(); - } - + dict_stats stats; + const bool stats_failed{stats.open(thd)}; trx_start_for_ddl(trx); uint errors= 0; char db[NAME_LEN + 1]; strconvert(&my_charset_filename, namebuf, len, system_charset_info, db, sizeof db, &errors); - if (!errors && table_stats && index_stats && - !strcmp(table_stats->name.m_name, TABLE_STATS_NAME) && - !strcmp(index_stats->name.m_name, INDEX_STATS_NAME) && - lock_table_for_trx(table_stats, trx, LOCK_X) == DB_SUCCESS && - lock_table_for_trx(index_stats, trx, LOCK_X) == DB_SUCCESS) + if (!errors && !stats_failed && + lock_table_for_trx(stats.table(), trx, LOCK_X) == DB_SUCCESS && + lock_table_for_trx(stats.index(), trx, LOCK_X) == DB_SUCCESS) { row_mysql_lock_data_dictionary(trx); if (dict_stats_delete(db, trx)) @@ -1457,19 +1428,16 @@ if (err != DB_SUCCESS) { trx->rollback(); - namebuf[len] = '\0'; - ib::error() << "DROP DATABASE " << namebuf << ": " << err; + sql_print_error("InnoDB: DROP DATABASE %.*s: %s", + int(len), namebuf, ut_strerr(err)); } else trx->commit(); - if (table_stats) - dict_table_close(table_stats, true, thd, mdl_table); - if (index_stats) - dict_table_close(index_stats, true, thd, mdl_index); row_mysql_unlock_data_dictionary(trx); - trx->free(); + if (!stats_failed) + stats.close(); if (err == DB_SUCCESS) { @@ -1620,9 +1588,9 @@ if (dict_table_t *table= m_prebuilt ? m_prebuilt->table : nullptr) { if (table->is_readable()) - dict_stats_init(table); + statistics_init(table, true); else - table->stat_initialized= 1; + table->stat.fetch_or(dict_table_t::STATS_INITIALIZED); } } @@ -1932,7 +1900,7 @@ { const trx_id_t trx_id= table->def_trx_id; DBUG_ASSERT(trx_id <= create_id); - dict_table_close(table); + table->release(); DBUG_PRINT("info", ("create_id: %llu trx_id: %" PRIu64, create_id, trx_id)); DBUG_RETURN(create_id != trx_id); } @@ -2978,6 +2946,45 @@ return XAER_NOTA; } +/** Initialize the InnoDB persistent statistics attributes. +@param table InnoDB table +@param table_options MariaDB table options +@param sar the value of STATS_AUTO_RECALC +@param initialized whether the InnoDB statistics were already initialized +@return whether table->stats_sample_pages needs to be initialized */ +static bool innodb_copy_stat_flags(dict_table_t *table, + ulong table_options, + enum_stats_auto_recalc sar, + bool initialized) noexcept +{ + if (table->is_temporary() || table->no_rollback()) + { + table->stat= dict_table_t::STATS_INITIALIZED | + dict_table_t::STATS_PERSISTENT_OFF | dict_table_t::STATS_AUTO_RECALC_OFF; + table->stats_sample_pages= 1; + return false; + } + + static_assert(HA_OPTION_STATS_PERSISTENT == + dict_table_t::STATS_PERSISTENT_ON << 11, ""); + static_assert(HA_OPTION_NO_STATS_PERSISTENT == + dict_table_t::STATS_PERSISTENT_OFF << 11, ""); + uint32_t stat= + uint32_t(table_options & + (HA_OPTION_STATS_PERSISTENT | + HA_OPTION_NO_STATS_PERSISTENT)) >> 11; + static_assert(uint32_t{HA_STATS_AUTO_RECALC_ON} << 3 == + dict_table_t::STATS_AUTO_RECALC_ON, ""); + static_assert(uint32_t{HA_STATS_AUTO_RECALC_OFF} << 3 == + dict_table_t::STATS_AUTO_RECALC_OFF, ""); + static_assert(true == dict_table_t::STATS_INITIALIZED, ""); + stat|= (sar & (HA_STATS_AUTO_RECALC_ON | HA_STATS_AUTO_RECALC_OFF)) << 3 | + uint32_t(initialized); + + table->stat= stat; + return true; +} + /*********************************************************************//** Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object. Those flags are stored in .frm file and end up in the MySQL table object, @@ -2990,29 +2997,9 @@ dict_table_t* innodb_table, /*!< in/out: InnoDB table */ const HA_CREATE_INFO* create_info) /*!< in: create info */ { - ibool ps_on; - ibool ps_off; - - if (innodb_table->is_temporary() - || innodb_table->no_rollback()) { - /* Temp tables do not use persistent stats. */ - ps_on = FALSE; - ps_off = TRUE; - } else { - ps_on = create_info->table_options - & HA_OPTION_STATS_PERSISTENT; - ps_off = create_info->table_options - & HA_OPTION_NO_STATS_PERSISTENT; - } - - dict_stats_set_persistent(innodb_table, ps_on, ps_off); - - dict_stats_auto_recalc_set( - innodb_table, - create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON, - create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF); - - innodb_table->stats_sample_pages = create_info->stats_sample_pages; + if (innodb_copy_stat_flags(innodb_table, create_info->table_options, + create_info->stats_auto_recalc, false)) + innodb_table->stats_sample_pages= create_info->stats_sample_pages; } /*********************************************************************//** @@ -3026,28 +3013,10 @@ dict_table_t* innodb_table, /*!< in/out: InnoDB table */ const TABLE_SHARE* table_share) /*!< in: table share */ { - ibool ps_on; - ibool ps_off; - - if (innodb_table->is_temporary()) { - /* Temp tables do not use persistent stats */ - ps_on = FALSE; - ps_off = TRUE; - } else { - ps_on = table_share->db_create_options - & HA_OPTION_STATS_PERSISTENT; - ps_off = table_share->db_create_options - & HA_OPTION_NO_STATS_PERSISTENT; - } - - dict_stats_set_persistent(innodb_table, ps_on, ps_off); - - dict_stats_auto_recalc_set( - innodb_table, - table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON, - table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF); - - innodb_table->stats_sample_pages = table_share->stats_sample_pages; + if (innodb_copy_stat_flags(innodb_table, table_share->db_create_options, + table_share->stats_auto_recalc, + innodb_table->stat_initialized())) + innodb_table->stats_sample_pages= table_share->stats_sample_pages; } /*********************************************************************//** @@ -3288,7 +3257,7 @@ bool allow = innobase_query_caching_table_check_low(table, trx); - dict_table_close(table); + table->release(); if (allow) { /* If the isolation level is high, assign a read view for the @@ -3678,7 +3647,7 @@ m_prebuilt->used_in_HANDLER = TRUE; reset_template(); - m_prebuilt->trx->bulk_insert = false; + m_prebuilt->trx->bulk_insert &= TRX_DDL_BULK; } /*********************************************************************//** @@ -3701,53 +3670,44 @@ DBUG_RETURN(1); } -/** Return the minimum buffer pool size based on page size */ -static inline ulint min_buffer_pool_size() +static void innodb_buffer_pool_size_update(THD* thd,st_mysql_sys_var*,void*, + const void *save) noexcept { - ulint s= (BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4) * srv_page_size; - /* buf_pool_chunk_size minimum is 1M, so round up to a multiple */ - ulint alignment= 1U << 20; - return UT_CALC_ALIGN(s, alignment); + buf_pool.resize(*static_cast(save), thd); } -/** Validate the requested buffer pool size. Also, reserve the necessary -memory needed for buffer pool resize. -@param[in] thd thread handle -@param[in] var pointer to system variable -@param[out] save immediate result for update function -@param[in] value incoming string -@return 0 on success, 1 on failure. -*/ -static -int -innodb_buffer_pool_size_validate( - THD* thd, - struct st_mysql_sys_var* var, - void* save, - struct st_mysql_value* value); - -/** Update the system variable innodb_buffer_pool_size using the "saved" -value. This function is registered as a callback with MySQL. -@param[in] thd thread handle -@param[in] var pointer to system variable -@param[out] var_ptr where the formal string goes -@param[in] save immediate result from check function */ -static -void -innodb_buffer_pool_size_update( - THD* thd, - struct st_mysql_sys_var* var, - void* var_ptr, - const void* save); +static MYSQL_SYSVAR_SIZE_T(buffer_pool_size, buf_pool.size_in_bytes_requested, + PLUGIN_VAR_RQCMDARG, + "The size of the memory buffer InnoDB uses to cache data" + " and indexes of its tables.", + nullptr, innodb_buffer_pool_size_update, 128U << 20, 2U << 20, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), 1U << 20); + +#if defined __linux__ || !defined DBUG_OFF +static void innodb_buffer_pool_size_auto_min_update(THD*,st_mysql_sys_var*, + void*, const void *save) + noexcept +{ + mysql_mutex_lock(&buf_pool.mutex); + buf_pool.size_in_bytes_auto_min= *static_cast(save); + mysql_mutex_unlock(&buf_pool.mutex); +} -static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size, +static MYSQL_SYSVAR_SIZE_T(buffer_pool_size_auto_min, + buf_pool.size_in_bytes_auto_min, PLUGIN_VAR_RQCMDARG, - "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - innodb_buffer_pool_size_validate, - innodb_buffer_pool_size_update, - 128ULL << 20, - 2ULL << 20, - LLONG_MAX, 1024*1024L); + "Minimum innodb_buffer_pool_size for dynamic shrinking on memory pressure", + nullptr, innodb_buffer_pool_size_auto_min_update, 0, 0, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), + innodb_buffer_pool_extent_size); +#endif + +static MYSQL_SYSVAR_SIZE_T(buffer_pool_size_max, buf_pool.size_in_bytes_max, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Maximum innodb_buffer_pool_size", + nullptr, nullptr, 0, 0, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), + innodb_buffer_pool_extent_size); static MYSQL_SYSVAR_UINT(log_write_ahead_size, log_sys.write_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, @@ -3799,29 +3759,6 @@ return 0; } -/** Initialize and normalize innodb_buffer_pool_{chunk_,}size. */ -static void innodb_buffer_pool_size_init() -{ - if (srv_buf_pool_chunk_unit > srv_buf_pool_size) - { - /* Size unit of buffer pool is larger than srv_buf_pool_size. - adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */ - srv_buf_pool_chunk_unit = srv_buf_pool_size; - } - else if (srv_buf_pool_chunk_unit == 0) - { - srv_buf_pool_chunk_unit = srv_buf_pool_size / 64; - my_large_page_truncate(&srv_buf_pool_chunk_unit); - } - - if (srv_buf_pool_chunk_unit < buf_pool_chunk_min_size) - srv_buf_pool_chunk_unit = buf_pool_chunk_min_size; - - srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size); - innobase_buffer_pool_size = srv_buf_pool_size; -} - - static bool compression_algorithm_is_not_loaded(ulong compression_algorithm, myf flags) { @@ -3847,323 +3784,298 @@ @retval HA_ERR_INITIALIZATION when some parameters are out of range */ static int innodb_init_params() { - DBUG_ENTER("innodb_init_params"); + DBUG_ENTER("innodb_init_params"); - ulong num_pll_degree; + srv_page_size_shift= innodb_page_size_validate(srv_page_size); + if (!srv_page_size_shift) + { + sql_print_error("InnoDB: Invalid page size=%lu.\n", srv_page_size); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - /* Check that values don't overflow on 32-bit systems. */ - if (sizeof(ulint) == 4) { - if (innobase_buffer_pool_size > UINT_MAX32) { - sql_print_error( - "innodb_buffer_pool_size can't be over 4GB" - " on 32-bit systems"); - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - } - } + size_t &min= MYSQL_SYSVAR_NAME(buffer_pool_size).min_val; + min= ut_calc_align + (buf_pool.blocks_in_bytes(BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4), + 1U << 20); + size_t innodb_buffer_pool_size= buf_pool.size_in_bytes_requested; + + /* With large pages, buffer pool can't grow or shrink. */ + if (!buf_pool.size_in_bytes_max || my_use_large_pages || + innodb_buffer_pool_size > buf_pool.size_in_bytes_max) + buf_pool.size_in_bytes_max= ut_calc_align(innodb_buffer_pool_size, + innodb_buffer_pool_extent_size); + + MYSQL_SYSVAR_NAME(buffer_pool_size).max_val= buf_pool.size_in_bytes_max; +#if defined __linux__ || !defined DBUG_OFF + if (!buf_pool.size_in_bytes_auto_min || + buf_pool.size_in_bytes_auto_min > buf_pool.size_in_bytes_max) + buf_pool.size_in_bytes_auto_min= buf_pool.size_in_bytes_max; + MYSQL_SYSVAR_NAME(buffer_pool_size_auto_min).max_val= + buf_pool.size_in_bytes_max; +#endif - /* The buffer pool needs to be able to accommodate enough many - pages, even for larger pages */ - MYSQL_SYSVAR_NAME(buffer_pool_size).min_val= min_buffer_pool_size(); - - if (innobase_buffer_pool_size < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) { - ib::error() << "innodb_page_size=" - << srv_page_size << " requires " - << "innodb_buffer_pool_size >= " - << (MYSQL_SYSVAR_NAME(buffer_pool_size).min_val >> 20) - << "MiB current " << (innobase_buffer_pool_size >> 20) - << "MiB"; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - if (!ut_is_2pow(log_sys.write_size)) { - sql_print_error("InnoDB: innodb_log_write_ahead_size=%u" - " is not a power of two", - log_sys.write_size); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - if (compression_algorithm_is_not_loaded(innodb_compression_algorithm, ME_ERROR_LOG)) - DBUG_RETURN(HA_ERR_INITIALIZATION); - - if ((srv_encrypt_tables || srv_encrypt_log - || innodb_encrypt_temporary_tables) - && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) { - sql_print_error("InnoDB: cannot enable encryption, " - "encryption plugin is not available"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (innodb_buffer_pool_size < min) + { + sql_print_error("InnoDB: innodb_page_size=%lu requires " + "innodb_buffer_pool_size >= %zu MiB current %zu MiB", + srv_page_size, min >> 20, innodb_buffer_pool_size >> 20); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } + + if (!ut_is_2pow(log_sys.write_size)) + { + sql_print_error("InnoDB: innodb_log_write_ahead_size=%u" + " is not a power of two", + log_sys.write_size); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } + + if (compression_algorithm_is_not_loaded(innodb_compression_algorithm, ME_ERROR_LOG)) + DBUG_RETURN(HA_ERR_INITIALIZATION); + + if ((srv_encrypt_tables || srv_encrypt_log || + innodb_encrypt_temporary_tables) && + !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) + { + sql_print_error("InnoDB: cannot enable encryption, " + "encryption plugin is not available"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } #ifdef _WIN32 - if (!is_filename_allowed(srv_buf_dump_filename, - strlen(srv_buf_dump_filename), FALSE)) { - sql_print_error("InnoDB: innodb_buffer_pool_filename" - " cannot have colon (:) in the file name."); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (!is_filename_allowed(srv_buf_dump_filename, + strlen(srv_buf_dump_filename), false)) + { + sql_print_error("InnoDB: innodb_buffer_pool_filename" + " cannot have colon (:) in the file name."); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } #endif - /* First calculate the default path for innodb_data_home_dir etc., - in case the user has not given any value. + /* First calculate the default path for innodb_data_home_dir etc., + in case the user has not given any value. - Note that when using the embedded server, the datadirectory is not - necessarily the current directory of this program. */ + Note that when using the embedded server, the datadirectory is not + necessarily the current directory of this program. */ - fil_path_to_mysql_datadir = + fil_path_to_mysql_datadir = #ifndef HAVE_REPLICATION - mysqld_embedded ? mysql_real_data_home : + mysqld_embedded ? mysql_real_data_home : #endif - "./"; + "./"; - /* Set InnoDB initialization parameters according to the values - read from MySQL .cnf file */ + /* Set InnoDB initialization parameters according to the values + read from MySQL .cnf file */ - /* The default dir for data files is the datadir of MySQL */ + /* The default dir for data files is the datadir of MySQL */ - srv_data_home = innobase_data_home_dir - ? innobase_data_home_dir - : const_cast(fil_path_to_mysql_datadir); + srv_data_home= innobase_data_home_dir + ? innobase_data_home_dir + : const_cast(fil_path_to_mysql_datadir); #ifdef WITH_WSREP - /* If we use the wsrep API, then we need to tell the server - the path to the data files (for passing it to the SST scripts): */ - wsrep_set_data_home_dir(srv_data_home); + /* If we use the wsrep API, then we need to tell the server + the path to the data files (for passing it to the SST scripts): */ + wsrep_set_data_home_dir(srv_data_home); #endif /* WITH_WSREP */ - /*--------------- Shared tablespaces -------------------------*/ - - /* Check that the value of system variable innodb_page_size was - set correctly. Its value was put into srv_page_size. If valid, - return the associated srv_page_size_shift. */ - srv_page_size_shift = innodb_page_size_validate(srv_page_size); - if (!srv_page_size_shift) { - sql_print_error("InnoDB: Invalid page size=%lu.\n", - srv_page_size); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - srv_sys_space.set_space_id(TRX_SYS_SPACE); - - switch (srv_checksum_algorithm) { - case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: - case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: - srv_sys_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER - | FSP_FLAGS_FCRC32_PAGE_SSIZE()); - break; - default: - srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE()); - } - - srv_sys_space.set_path(srv_data_home); - - /* Supports raw devices */ - if (!srv_sys_space.parse_params(innobase_data_file_path, true)) { - ib::error() << "Unable to parse innodb_data_file_path=" - << innobase_data_file_path; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - srv_tmp_space.set_path(srv_data_home); - - /* Temporary tablespace is in full crc32 format. */ - srv_tmp_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER - | FSP_FLAGS_FCRC32_PAGE_SSIZE()); - - if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) { - ib::error() << "Unable to parse innodb_temp_data_file_path=" - << innobase_temp_data_file_path; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - /* Perform all sanity check before we take action of deleting files*/ - if (srv_sys_space.intersection(&srv_tmp_space)) { - sql_print_error("innodb_temporary and innodb_system" - " file names seem to be the same."); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - srv_sys_space.normalize_size(); - srv_tmp_space.normalize_size(); - - /* ------------ UNDO tablespaces files ---------------------*/ - if (!srv_undo_dir) { - srv_undo_dir = const_cast(fil_path_to_mysql_datadir); - } - - if (strchr(srv_undo_dir, ';')) { - sql_print_error("syntax error in innodb_undo_directory"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + /*--------------- Shared tablespaces -------------------------*/ - /* -------------- All log files ---------------------------*/ - - /* The default dir for log files is the datadir of MySQL */ + /* Check that the value of system variable innodb_page_size was + set correctly. Its value was put into srv_page_size. If valid, + return the associated srv_page_size_shift. */ + + srv_sys_space.set_space_id(TRX_SYS_SPACE); + /* Temporary tablespace is in full crc32 format. */ + srv_tmp_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER | + FSP_FLAGS_FCRC32_PAGE_SSIZE()); + + switch (srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: + srv_sys_space.set_flags(srv_tmp_space.flags()); + break; + default: + srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE()); + } - if (!srv_log_group_home_dir) { - srv_log_group_home_dir - = const_cast(fil_path_to_mysql_datadir); - } + srv_sys_space.set_path(srv_data_home); - if (strchr(srv_log_group_home_dir, ';')) { - sql_print_error("syntax error in innodb_log_group_home_dir"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL); + if (!srv_sys_space.parse_params(innobase_data_file_path, true)) + { + sql_print_error("InnoDB: Unable to parse innodb_data_file_path=%s", + innobase_data_file_path); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - /* Check that interdependent parameters have sane values. */ - if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) { - sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm" - " cannot be set higher than" - " innodb_max_dirty_pages_pct.\n" - "InnoDB: Setting" - " innodb_max_dirty_pages_pct_lwm to %lf\n", - srv_max_buf_pool_modified_pct); + srv_tmp_space.set_path(srv_data_home); - srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct; - } + if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) + { + sql_print_error("InnoDB: Unable to parse innodb_temp_data_file_path=%s", + innobase_temp_data_file_path); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) { + /* Perform all sanity check before we take action of deleting files*/ + if (srv_sys_space.intersection(&srv_tmp_space)) + { + sql_print_error("innodb_temporary and innodb_system" + " file names seem to be the same."); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) { - /* Avoid overflow. */ - srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT; - } else { - /* The user has not set the value. We should - set it based on innodb_io_capacity. */ - srv_max_io_capacity = - ut_max(2 * srv_io_capacity, 2000UL); - } + srv_sys_space.normalize_size(); + srv_tmp_space.normalize_size(); - } else if (srv_max_io_capacity < srv_io_capacity) { - sql_print_warning("InnoDB: innodb_io_capacity" - " cannot be set higher than" - " innodb_io_capacity_max." - "Setting innodb_io_capacity=%lu", - srv_max_io_capacity); + /* ------------ UNDO tablespaces files ---------------------*/ + if (!srv_undo_dir) + srv_undo_dir= const_cast(fil_path_to_mysql_datadir); - srv_io_capacity = srv_max_io_capacity; - } + if (strchr(srv_undo_dir, ';')) + { + sql_print_error("syntax error in innodb_undo_directory"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - if (UNIV_PAGE_SIZE_DEF != srv_page_size) { - ib::info() << "innodb_page_size=" << srv_page_size; + if (!srv_log_group_home_dir) + srv_log_group_home_dir= const_cast(fil_path_to_mysql_datadir); - srv_max_undo_log_size = std::max( - srv_max_undo_log_size, - ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) - << srv_page_size_shift); - } + if (strchr(srv_log_group_home_dir, ';')) + { + sql_print_error("syntax error in innodb_log_group_home_dir"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - srv_buf_pool_size = ulint(innobase_buffer_pool_size); + DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL); - if (innobase_open_files < 10) { - innobase_open_files = 300; - if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) { - innobase_open_files = tc_size; - } - } + /* Check that interdependent parameters have sane values. */ + if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) + { + sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm" + " cannot be set higher than" + " innodb_max_dirty_pages_pct.\n" + "InnoDB: Setting" + " innodb_max_dirty_pages_pct_lwm to %lf\n", + srv_max_buf_pool_modified_pct); + srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct; + } - if (innobase_open_files > open_files_limit) { - ib::warn() << "innodb_open_files " << innobase_open_files - << " should not be greater" - << " than the open_files_limit " << open_files_limit; - if (innobase_open_files > tc_size) { - innobase_open_files = tc_size; - } - } + if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) + { + if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) + /* Avoid overflow. */ + srv_max_io_capacity= SRV_MAX_IO_CAPACITY_LIMIT; + else + /* The user has not set the value. We should set it based on + innodb_io_capacity. */ + srv_max_io_capacity= std::max(2 * srv_io_capacity, 2000UL); + } + else if (srv_max_io_capacity < srv_io_capacity) + { + sql_print_warning("InnoDB: innodb_io_capacity cannot be set higher than" + " innodb_io_capacity_max." + "Setting innodb_io_capacity=%lu", srv_max_io_capacity); + srv_io_capacity= srv_max_io_capacity; + } - ulint min_open_files_limit = srv_undo_tablespaces - + srv_sys_space.m_files.size() - + srv_tmp_space.m_files.size() + 1; - if (min_open_files_limit > innobase_open_files) { - sql_print_warning( - "InnoDB: innodb_open_files=%lu is not greater " - "than the number of system tablespace files, " - "temporary tablespace files, " - "innodb_undo_tablespaces=%u; adjusting " - "to innodb_open_files=%zu", - innobase_open_files, srv_undo_tablespaces, - min_open_files_limit); - innobase_open_files = (ulong) min_open_files_limit; - } + if (UNIV_PAGE_SIZE_DEF != srv_page_size) + { + sql_print_information("InnoDB: innodb_page_size=%lu", srv_page_size); + srv_max_undo_log_size= + std::max(srv_max_undo_log_size, + ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) << + srv_page_size_shift); + } - srv_max_n_open_files = innobase_open_files; - srv_innodb_status = (ibool) innobase_create_status_file; + if (innobase_open_files < 10) + innobase_open_files= (srv_file_per_table && tc_size > 300 && + tc_size < open_files_limit) + ? tc_size + : 300; - srv_print_verbose_log = mysqld_embedded ? 0 : 1; + if (innobase_open_files > open_files_limit) + { + sql_print_warning("InnoDB: innodb_open_files %lu" + " should not be greater than the open_files_limit %lu", + innobase_open_files, open_files_limit); + if (innobase_open_files > tc_size) + innobase_open_files= tc_size; + } - /* Round up fts_sort_pll_degree to nearest power of 2 number */ - for (num_pll_degree = 1; - num_pll_degree < fts_sort_pll_degree; - num_pll_degree <<= 1) { + const size_t min_open_files_limit= srv_undo_tablespaces + + srv_sys_space.m_files.size() + srv_tmp_space.m_files.size() + 1; + if (min_open_files_limit > innobase_open_files) + { + sql_print_warning("InnoDB: innodb_open_files=%lu is not greater " + "than the number of system tablespace files, " + "temporary tablespace files, " + "innodb_undo_tablespaces=%lu; adjusting " + "to innodb_open_files=%zu", + innobase_open_files, srv_undo_tablespaces, + min_open_files_limit); + innobase_open_files= ulong(min_open_files_limit); + } - /* No op */ - } + srv_max_n_open_files= innobase_open_files; + srv_innodb_status = (ibool) innobase_create_status_file; - fts_sort_pll_degree = num_pll_degree; + srv_print_verbose_log= !mysqld_embedded; - /* Store the default charset-collation number of this MySQL - installation */ + if (!ut_is_2pow(fts_sort_pll_degree)) + { + ulong n; + for (n= 1; n < fts_sort_pll_degree; n<<= 1) {} + fts_sort_pll_degree= n; + } - data_mysql_default_charset_coll = (ulint) default_charset_info->number; + /* Store the default charset-collation number of this installation */ + data_mysql_default_charset_coll = (ulint) default_charset_info->number; #if !defined _WIN32 && defined O_DIRECT - if (srv_use_atomic_writes && my_may_have_atomic_write) { - /* - Force O_DIRECT on Unixes (on Windows writes are always - unbuffered) - */ - switch (srv_file_flush_method) { - case SRV_O_DIRECT: - case SRV_O_DIRECT_NO_FSYNC: - break; - default: - srv_file_flush_method = SRV_O_DIRECT; - fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n"); - } - } + if (srv_use_atomic_writes && my_may_have_atomic_write) + { + /* Force O_DIRECT on Unixes (on Windows writes are always unbuffered) */ + switch (srv_file_flush_method) { + case SRV_O_DIRECT: + case SRV_O_DIRECT_NO_FSYNC: + break; + default: + srv_file_flush_method= SRV_O_DIRECT; + fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n"); + } + } #endif #if defined __linux__ || defined _WIN32 - if (srv_flush_log_at_trx_commit == 2) { - /* Do not disable the file system cache if - innodb_flush_log_at_trx_commit=2. */ - log_sys.log_buffered = true; - } + if (srv_flush_log_at_trx_commit == 2) + /* Do not disable the file system cache if + innodb_flush_log_at_trx_commit=2. */ + log_sys.log_buffered= true; #endif #if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32 - /* Currently native AIO is supported only on windows and linux - and that also when the support is compiled in. In all other - cases, we ignore the setting of innodb_use_native_aio. */ - srv_use_native_aio = FALSE; -#endif -#ifdef HAVE_URING - if (srv_use_native_aio && io_uring_may_be_unsafe) { - sql_print_warning("innodb_use_native_aio may cause " - "hangs with this kernel %s; see " - "https://jira.mariadb.org/browse/MDEV-26674", - io_uring_may_be_unsafe); - } + /* Currently native AIO is supported only on windows and linux + and that also when the support is compiled in. In all other + cases, we ignore the setting of innodb_use_native_aio. */ + srv_use_native_aio= FALSE; #endif #ifdef _WIN32 - switch (srv_file_flush_method) { - case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */: - srv_file_flush_method = SRV_ALL_O_DIRECT_FSYNC; - break; - case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */: - srv_file_flush_method = SRV_FSYNC; - break; - default: - ut_ad(srv_file_flush_method <= SRV_ALL_O_DIRECT_FSYNC); - } + switch (srv_file_flush_method) { + case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */: + srv_file_flush_method= SRV_ALL_O_DIRECT_FSYNC; + break; + case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */: + srv_file_flush_method= SRV_FSYNC; + break; + default: + ut_ad(srv_file_flush_method <= SRV_ALL_O_DIRECT_FSYNC); + } #else - ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC); + ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC); #endif - innodb_buffer_pool_size_init(); - - srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift); - DBUG_RETURN(0); + DBUG_RETURN(0); } /** Initialize the InnoDB storage engine plugin. @@ -4576,7 +4488,7 @@ undo_no_t savept= 0; trx->rollback(&savept); /* MariaDB will roll back the entire transaction. */ - trx->bulk_insert= false; + trx->bulk_insert&= TRX_DDL_BULK; trx->last_stmt_start= 0; return true; } @@ -4620,10 +4532,9 @@ ut_ad("invalid state" == 0); /* fall through */ case TRX_STATE_PREPARED: - ut_ad(commit_trx || trx->is_wsrep()); - ut_ad(thd_test_options(thd, OPTION_NOT_AUTOCOMMIT - | OPTION_BEGIN) - || trx->is_wsrep()); + ut_ad(commit_trx || + !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT + | OPTION_BEGIN)); /* fall through */ case TRX_STATE_ACTIVE: /* Transaction is deregistered only in a commit or a @@ -4825,11 +4736,13 @@ We put the request in a queue, so that we can notify upper layer about checkpoint complete when we have flushed the redo log. If we have already flushed all relevant redo log, we notify immediately.*/ -static void innodb_log_flush_request(void *cookie) +static void innodb_log_flush_request(void *cookie) noexcept { + log_sys.latch.wr_lock(SRW_LOCK_CALL); lsn_t flush_lsn= log_sys.get_flushed_lsn(); /* Load lsn relaxed after flush_lsn was loaded from the same cache line */ const lsn_t lsn= log_sys.get_lsn(); + log_sys.latch.wr_unlock(); if (flush_lsn >= lsn) /* All log is already persistent. */; @@ -5837,6 +5750,70 @@ table->autoinc_mutex.wr_unlock(); } +dberr_t ha_innobase::statistics_init(dict_table_t *table, bool recalc) +{ + ut_ad(table->is_readable()); + ut_ad(!table->stats_mutex_is_owner()); + + uint32_t stat= table->stat; + dberr_t err= DB_SUCCESS; + + if (!recalc && dict_table_t::stat_initialized(stat)); + else if (srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) + dict_stats_empty_table(table, false); + else + { + if (dict_table_t::stats_is_persistent(stat) && !srv_read_only_mode +#ifdef WITH_WSREP + && !wsrep_thd_skip_locking(m_user_thd) +#endif + ) + { + switch (dict_stats_persistent_storage_check(false)) { + case SCHEMA_OK: + if (recalc) + { + recalc: + err= dict_stats_update_persistent(table); + if (err == DB_SUCCESS) + err= dict_stats_save(table); + } + else + { + err= dict_stats_fetch_from_ps(table); + if (err == DB_STATS_DO_NOT_EXIST && table->stats_is_auto_recalc()) + goto recalc; + } + if (err == DB_SUCCESS || err == DB_READ_ONLY) + return err; + if (!recalc) + break; + /* fall through */ + case SCHEMA_INVALID: + if (table->stats_error_printed) + break; + table->stats_error_printed = true; + if (opt_bootstrap) + break; + sql_print_warning("InnoDB: %s of persistent statistics requested" + " for table %`.*s.%`s" + " but the required persistent statistics storage" + " is corrupted.", + recalc ? "Recalculation" : "Fetch", + int(table->name.dblen()), table->name.m_name, + table->name.basename()); + /* fall through */ + case SCHEMA_NOT_EXIST: + err= DB_STATS_DO_NOT_EXIST; + } + } + + dict_stats_update_transient(table); + } + + return err; +} + /** Open an InnoDB table @param[in] name table name @return error code @@ -7958,6 +7935,17 @@ error, m_prebuilt->table->flags, m_user_thd); #ifdef WITH_WSREP +#ifdef ENABLED_DEBUG_SYNC + DBUG_EXECUTE_IF("sync.wsrep_after_write_row", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_after_write_row_reached " + "WAIT_FOR signal.wsrep_after_write_row"; + DBUG_ASSERT(!debug_sync_set_action(m_user_thd, STRING_WITH_LEN(act))); + };); +#endif /* ENABLED_DEBUG_SYNC */ + if (!error_result && trx->is_wsrep() && !trx->is_bulk_insert() && wsrep_thd_is_local(m_user_thd) @@ -13338,7 +13326,7 @@ if (!error) { - dict_stats_update(info.table(), DICT_STATS_EMPTY_TABLE); + dict_stats_empty_table_and_save(info.table()); if (!info.table()->is_temporary()) log_write_up_to(trx->commit_lsn, true); info.table()->release(); @@ -13387,6 +13375,8 @@ DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE); } + ut_ad(m_prebuilt->table->stat_initialized()); + if (m_prebuilt->table->space == fil_system.sys_space) { ib_senderrf( m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, @@ -13460,23 +13450,17 @@ err, m_prebuilt->table->flags, NULL)); } - if (dict_stats_is_persistent_enabled(m_prebuilt->table)) { - dberr_t ret; - - /* Adjust the persistent statistics. */ - ret = dict_stats_update(m_prebuilt->table, - DICT_STATS_RECALC_PERSISTENT); + dict_table_t* t = m_prebuilt->table; - if (ret != DB_SUCCESS) { - push_warning_printf( - ha_thd(), - Sql_condition::WARN_LEVEL_WARN, - ER_ALTER_INFO, - "Error updating stats for table '%s'" - " after table rebuild: %s", - m_prebuilt->table->name.m_name, - ut_strerr(ret)); - } + if (dberr_t ret = dict_stats_update_persistent_try(t)) { + push_warning_printf( + ha_thd(), + Sql_condition::WARN_LEVEL_WARN, + ER_ALTER_INFO, + "Error updating stats after" + " ALTER TABLE %`.*s.%`s IMPORT TABLESPACE: %s", + int(t->name.dblen()), t->name.m_name, + t->name.basename(), ut_strerr(ret)); } DBUG_RETURN(0); @@ -13619,8 +13603,6 @@ err= lock_table_children(table, trx); } - dict_table_t *table_stats= nullptr, *index_stats= nullptr; - MDL_ticket *mdl_table= nullptr, *mdl_index= nullptr; if (err == DB_SUCCESS) err= lock_table_for_trx(table, trx, LOCK_X); @@ -13645,7 +13627,7 @@ /* This looks like the rollback of ALTER TABLE...ADD PARTITION that was caused by MDL timeout. We could have written undo log for inserting the data into the new partitions. */ - if (table->stat_persistent != DICT_STATS_PERSISTENT_OFF) + if (!(table->stat & dict_table_t::STATS_PERSISTENT_OFF)) { /* We do not really know if we are holding MDL_EXCLUSIVE. Even though this code is handling the case that we are not holding @@ -13659,37 +13641,18 @@ #endif DEBUG_SYNC(thd, "before_delete_table_stats"); + dict_stats stats; + bool stats_failed= true; - if (err == DB_SUCCESS && dict_stats_is_persistent_enabled(table) && + if (err == DB_SUCCESS && table->stats_is_persistent() && !table->is_stats_table()) { - table_stats= dict_table_open_on_name(TABLE_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (table_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats= dict_acquire_mdl_shared(table_stats, - thd, &mdl_table); - dict_sys.unfreeze(); - } - - index_stats= dict_table_open_on_name(INDEX_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (index_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats= dict_acquire_mdl_shared(index_stats, - thd, &mdl_index); - dict_sys.unfreeze(); - } - + stats_failed= stats.open(thd); const bool skip_wait{table->name.is_temporary()}; - if (table_stats && index_stats && - !strcmp(table_stats->name.m_name, TABLE_STATS_NAME) && - !strcmp(index_stats->name.m_name, INDEX_STATS_NAME) && - !(err= lock_table_for_trx(table_stats, trx, LOCK_X, skip_wait))) - err= lock_table_for_trx(index_stats, trx, LOCK_X, skip_wait); + if (!stats_failed && + !(err= lock_table_for_trx(stats.table(), trx, LOCK_X, skip_wait))) + err= lock_table_for_trx(stats.index(), trx, LOCK_X, skip_wait); if (err != DB_SUCCESS && skip_wait) { @@ -13698,10 +13661,8 @@ ut_ad(err == DB_LOCK_WAIT); ut_ad(trx->error_state == DB_SUCCESS); err= DB_SUCCESS; - dict_table_close(table_stats, false, thd, mdl_table); - dict_table_close(index_stats, false, thd, mdl_index); - table_stats= nullptr; - index_stats= nullptr; + stats.close(); + stats_failed= true; } } @@ -13772,13 +13733,11 @@ else if (rollback_add_partition) purge_sys.resume_FTS(); #endif - if (table_stats) - dict_table_close(table_stats, true, thd, mdl_table); - if (index_stats) - dict_table_close(index_stats, true, thd, mdl_index); row_mysql_unlock_data_dictionary(trx); if (trx != parent_trx) trx->free(); + if (!stats_failed) + stats.close(); DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL)); } @@ -13793,7 +13752,7 @@ err= trx->drop_table_foreign(table->name); } - if (err == DB_SUCCESS && table_stats && index_stats) + if (err == DB_SUCCESS && !stats_failed) err= trx->drop_table_statistics(table->name); if (err != DB_SUCCESS) goto err_exit; @@ -13804,11 +13763,9 @@ std::vector deleted; trx->commit(deleted); - if (table_stats) - dict_table_close(table_stats, true, thd, mdl_table); - if (index_stats) - dict_table_close(index_stats, true, thd, mdl_index); row_mysql_unlock_data_dictionary(trx); + if (!stats_failed) + stats.close(); for (pfs_os_file_t d : deleted) os_file_close(d); log_write_up_to(trx->commit_lsn, true); @@ -14004,9 +13961,6 @@ ib_table->name.m_name, ib_table->id); const char *name= mem_heap_strdup(heap, ib_table->name.m_name); - dict_table_t *table_stats = nullptr, *index_stats = nullptr; - MDL_ticket *mdl_table = nullptr, *mdl_index = nullptr; - dberr_t error= lock_table_children(ib_table, trx); if (error == DB_SUCCESS) @@ -14014,6 +13968,7 @@ const bool fts= error == DB_SUCCESS && ib_table->flags2 & (DICT_TF2_FTS_HAS_DOC_ID | DICT_TF2_FTS); + const bool pause_purge= error == DB_SUCCESS && ib_table->get_ref_count() > 1; if (fts) { @@ -14021,45 +13976,33 @@ purge_sys.stop_FTS(*ib_table); error= fts_lock_tables(trx, *ib_table); } + else if (pause_purge) + purge_sys.stop_FTS(); - /* Wait for purge threads to stop using the table. */ - for (uint n = 15; ib_table->get_ref_count() > 1; ) + if (error == DB_SUCCESS) { - if (!--n) + /* Wait for purge threads to stop using the table. */ + for (uint n = 15; ib_table->get_ref_count() > 1; ) { - error= DB_LOCK_WAIT_TIMEOUT; - break; + if (!--n) + { + error= DB_LOCK_WAIT_TIMEOUT; + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(50)); } - std::this_thread::sleep_for(std::chrono::milliseconds(50)); } - if (error == DB_SUCCESS && dict_stats_is_persistent_enabled(ib_table) && + dict_stats stats; + bool stats_failed= true; + + if (error == DB_SUCCESS && ib_table->stats_is_persistent() && !ib_table->is_stats_table()) { - table_stats= dict_table_open_on_name(TABLE_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (table_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats= dict_acquire_mdl_shared(table_stats, m_user_thd, - &mdl_table); - dict_sys.unfreeze(); - } - index_stats= dict_table_open_on_name(INDEX_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (index_stats) - { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats= dict_acquire_mdl_shared(index_stats, m_user_thd, - &mdl_index); - dict_sys.unfreeze(); - } - - if (table_stats && index_stats && - !strcmp(table_stats->name.m_name, TABLE_STATS_NAME) && - !strcmp(index_stats->name.m_name, INDEX_STATS_NAME) && - !(error= lock_table_for_trx(table_stats, trx, LOCK_X))) - error= lock_table_for_trx(index_stats, trx, LOCK_X); + stats_failed= stats.open(m_user_thd); + if (!stats_failed && + !(error= lock_table_for_trx(stats.table(), trx, LOCK_X))) + error= lock_table_for_trx(stats.index(), trx, LOCK_X); } if (error == DB_SUCCESS) @@ -14123,7 +14066,7 @@ if (!err) { - dict_stats_update(m_prebuilt->table, DICT_STATS_EMPTY_TABLE); + dict_stats_empty_table_and_save(m_prebuilt->table); log_write_up_to(trx->commit_lsn, true); row_prebuilt_t *prebuilt= m_prebuilt; uchar *upd_buf= m_upd_buf; @@ -14151,15 +14094,46 @@ } trx->free(); - + if (!stats_failed) + stats.close(); mem_heap_free(heap); + DBUG_RETURN(err); +} - if (table_stats) - dict_table_close(table_stats, false, m_user_thd, mdl_table); - if (index_stats) - dict_table_close(index_stats, false, m_user_thd, mdl_index); +/** Deinitialize InnoDB persistent statistics, forcing them +to be reloaded on subsequent ha_innobase::open(). +@param t table for which the cached STATS_PERSISTENT are to be evicted */ +static void stats_deinit(dict_table_t *t) noexcept +{ + ut_ad(dict_sys.frozen()); + ut_ad(t->get_ref_count() == 0); - DBUG_RETURN(err); + if (t->is_temporary() || t->no_rollback()) + return; + + t->stats_mutex_lock(); + t->stat= t->stat & ~dict_table_t::STATS_INITIALIZED; + MEM_UNDEFINED(&t->stat_n_rows, sizeof t->stat_n_rows); + MEM_UNDEFINED(&t->stat_clustered_index_size, + sizeof t->stat_clustered_index_size); + MEM_UNDEFINED(&t->stat_sum_of_other_index_sizes, + sizeof t->stat_sum_of_other_index_sizes); + MEM_UNDEFINED(&t->stat_modified_counter, sizeof t->stat_modified_counter); +#ifdef HAVE_valgrind + for (dict_index_t *i= dict_table_get_first_index(t); i; + i= dict_table_get_next_index(i)) + { + MEM_UNDEFINED(i->stat_n_diff_key_vals, + i->n_uniq * sizeof *i->stat_n_diff_key_vals); + MEM_UNDEFINED(i->stat_n_sample_sizes, + i->n_uniq * sizeof *i->stat_n_sample_sizes); + MEM_UNDEFINED(i->stat_n_non_null_key_vals, + i->n_uniq * sizeof *i->stat_n_non_null_key_vals); + MEM_UNDEFINED(&i->stat_index_size, sizeof i->stat_index_size); + MEM_UNDEFINED(&i->stat_n_leaf_pages, sizeof i->stat_n_leaf_pages); + } +#endif /* HAVE_valgrind */ + t->stats_mutex_unlock(); } /*********************************************************************//** @@ -14184,8 +14158,6 @@ trx_t* trx = innobase_trx_allocate(thd); trx_start_for_ddl(trx); - dict_table_t *table_stats = nullptr, *index_stats = nullptr; - MDL_ticket *mdl_table = nullptr, *mdl_index = nullptr; char norm_from[MAX_FULL_NAME_LEN]; char norm_to[MAX_FULL_NAME_LEN]; @@ -14195,45 +14167,49 @@ dberr_t error = DB_SUCCESS; const bool from_temp = dict_table_t::is_temporary_name(norm_from); + dict_table_t* t; + bool pause_purge = false, fts_exist = false; + if (from_temp) { /* There is no need to lock any FOREIGN KEY child tables. */ - } else if (dict_table_t *table = dict_table_open_on_name( - norm_from, false, DICT_ERR_IGNORE_FK_NOKEY)) { - error = lock_table_children(table, trx); - if (error == DB_SUCCESS) { - error = lock_table_for_trx(table, trx, LOCK_X); + t = nullptr; + } else { + t = dict_table_open_on_name( + norm_from, false, DICT_ERR_IGNORE_FK_NOKEY); + if (t) { + error = lock_table_children(t, trx); + if (error == DB_SUCCESS) { + error = lock_table_for_trx(t, trx, LOCK_X); + } + fts_exist = error == DB_SUCCESS && t->flags2 + & (DICT_TF2_FTS_HAS_DOC_ID | DICT_TF2_FTS); + pause_purge = error == DB_SUCCESS + && t->get_ref_count() > 1; + if (fts_exist) { + fts_optimize_remove_table(t); + purge_sys.stop_FTS(*t); + if (error == DB_SUCCESS) { + error = fts_lock_tables(trx, *t); + } + } else if (pause_purge) { + purge_sys.stop_FTS(); + } } - table->release(); } + dict_stats stats; + bool stats_fail = true; + if (strcmp(norm_from, TABLE_STATS_NAME) && strcmp(norm_from, INDEX_STATS_NAME) && strcmp(norm_to, TABLE_STATS_NAME) && strcmp(norm_to, INDEX_STATS_NAME)) { - table_stats = dict_table_open_on_name(TABLE_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (table_stats) { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats = dict_acquire_mdl_shared( - table_stats, thd, &mdl_table); - dict_sys.unfreeze(); - } - index_stats = dict_table_open_on_name(INDEX_STATS_NAME, false, - DICT_ERR_IGNORE_NONE); - if (index_stats) { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats = dict_acquire_mdl_shared( - index_stats, thd, &mdl_index); - dict_sys.unfreeze(); - } - - if (error == DB_SUCCESS && table_stats && index_stats - && !strcmp(table_stats->name.m_name, TABLE_STATS_NAME) - && !strcmp(index_stats->name.m_name, INDEX_STATS_NAME)) { - error = lock_table_for_trx(table_stats, trx, LOCK_X, - from_temp); + stats_fail = stats.open(thd); + if (!stats_fail && error == DB_SUCCESS) { + error = lock_table_for_trx(stats.table(), trx, + LOCK_X, from_temp); if (error == DB_SUCCESS) { - error = lock_table_for_trx(index_stats, trx, + error = lock_table_for_trx(stats.index(), trx, LOCK_X, from_temp); } if (error != DB_SUCCESS && from_temp) { @@ -14244,12 +14220,8 @@ we cannot lock the tables, when the table is being renamed from from a temporary name. */ - dict_table_close(table_stats, false, thd, - mdl_table); - dict_table_close(index_stats, false, thd, - mdl_index); - table_stats = nullptr; - index_stats = nullptr; + stats.close(); + stats_fail = true; } } } @@ -14276,7 +14248,7 @@ DEBUG_SYNC(thd, "after_innobase_rename_table"); - if (error == DB_SUCCESS && table_stats && index_stats) { + if (error == DB_SUCCESS && !stats_fail) { error = dict_stats_rename_table(norm_from, norm_to, trx); if (error == DB_DUPLICATE_KEY) { /* The duplicate may also occur in @@ -14289,33 +14261,52 @@ if (error == DB_SUCCESS) { trx->flush_log_later = true; + if (t) { + ut_ad(dict_sys.locked()); + if (fts_exist) { + fts_optimize_add_table(t); + } + if (UNIV_LIKELY(t->release())) { + stats_deinit(t); + } else { + ut_ad("unexpected references" == 0); + } + } innobase_commit_low(trx); } else { + if (t) { + if (fts_exist) { + fts_optimize_add_table(t); + } + t->release(); + } trx->rollback(); } - if (table_stats) { - dict_table_close(table_stats, true, thd, mdl_table); - } - if (index_stats) { - dict_table_close(index_stats, true, thd, mdl_index); - } row_mysql_unlock_data_dictionary(trx); + + if (fts_exist || pause_purge) { + purge_sys.resume_FTS(); + } + if (error == DB_SUCCESS) { log_write_up_to(trx->commit_lsn, true); } trx->flush_log_later = false; trx->free(); + if (!stats_fail) { + stats.close(); + } if (error == DB_DUPLICATE_KEY) { /* We are not able to deal with handler::get_dup_key() during DDL operations, because the duplicate key would exist in metadata tables, not in the user table. */ my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to); - error = DB_ERROR; + DBUG_RETURN(HA_ERR_GENERIC); } else if (error == DB_LOCK_WAIT_TIMEOUT) { my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); - error = DB_LOCK_WAIT; + DBUG_RETURN(HA_ERR_GENERIC); } DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL)); @@ -14529,7 +14520,7 @@ ulint stat_clustered_index_size; - ut_a(m_prebuilt->table->stat_initialized); + ut_ad(m_prebuilt->table->stat_initialized()); stat_clustered_index_size = m_prebuilt->table->stat_clustered_index_size; @@ -14656,7 +14647,7 @@ rec_per_key_t rec_per_key; ib_uint64_t n_diff; - ut_a(index->table->stat_initialized); + ut_ad(index->table->stat_initialized()); ut_ad(i < dict_index_get_n_unique(index)); ut_ad(!dict_index_is_spatial(index)); @@ -14794,63 +14785,82 @@ ib_table = m_prebuilt->table; DBUG_ASSERT(ib_table->get_ref_count() > 0); - if (!ib_table->is_readable()) { + if (!ib_table->is_readable() + || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { dict_stats_empty_table(ib_table, true); - } - - if (flag & HA_STATUS_TIME) { - if (is_analyze || innobase_stats_on_metadata) { + } else if (flag & HA_STATUS_TIME) { + stats.update_time = ib_table->update_time; + if (!is_analyze && !innobase_stats_on_metadata) { + goto stats_fetch; + } - dict_stats_upd_option_t opt; - dberr_t ret; + dberr_t ret; + m_prebuilt->trx->op_info = "updating table statistics"; - m_prebuilt->trx->op_info = "updating table statistics"; + if (ib_table->stats_is_persistent() + && !srv_read_only_mode + && dict_stats_persistent_storage_check(false) + == SCHEMA_OK) { + if (is_analyze) { + dict_stats_recalc_pool_del(ib_table->id, + false); +recalc: + ret = statistics_init(ib_table, is_analyze); + } else { + /* This is e.g. 'SHOW INDEXES' */ + ret = statistics_init(ib_table, is_analyze); + switch (ret) { + case DB_SUCCESS: + case DB_READ_ONLY: + break; + default: + goto error; + case DB_STATS_DO_NOT_EXIST: + if (!ib_table + ->stats_is_auto_recalc()) { + break; + } - if (dict_stats_is_persistent_enabled(ib_table)) { - if (is_analyze) { - if (!srv_read_only_mode) { - dict_stats_recalc_pool_del( - ib_table->id, false); + if (opt_bootstrap) { + break; } - opt = DICT_STATS_RECALC_PERSISTENT; - } else { - /* This is e.g. 'SHOW INDEXES', fetch - the persistent stats from disk. */ - opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY; +#ifdef WITH_WSREP + if (wsrep_thd_skip_locking( + m_user_thd)) { + break; + } +#endif + is_analyze = true; + goto recalc; } - } else { - opt = DICT_STATS_RECALC_TRANSIENT; } - - ret = dict_stats_update(ib_table, opt); - + } else { + ret = dict_stats_update_transient(ib_table); if (ret != DB_SUCCESS) { +error: m_prebuilt->trx->op_info = ""; DBUG_RETURN(HA_ERR_GENERIC); } - - m_prebuilt->trx->op_info = - "returning various info to MariaDB"; } - - stats.update_time = (ulong) ib_table->update_time; + m_prebuilt->trx->op_info = "returning various info to MariaDB"; + } else { +stats_fetch: + statistics_init(ib_table, false); } - dict_stats_init(ib_table); - if (flag & HA_STATUS_VARIABLE) { ulint stat_clustered_index_size; ulint stat_sum_of_other_index_sizes; - ut_a(ib_table->stat_initialized); - #if !defined NO_ELISION && !defined SUX_LOCK_GENERIC if (xbegin()) { if (ib_table->stats_mutex_is_locked()) xabort(); + ut_ad(ib_table->stat_initialized()); + n_rows = ib_table->stat_n_rows; stat_clustered_index_size @@ -14865,6 +14875,8 @@ { ib_table->stats_shared_lock(); + ut_ad(ib_table->stat_initialized()); + n_rows = ib_table->stat_n_rows; stat_clustered_index_size @@ -14998,7 +15010,7 @@ auto _ = make_scope_exit([ib_table]() { ib_table->stats_shared_unlock(); }); - ut_a(ib_table->stat_initialized); + ut_ad(ib_table->stat_initialized()); for (uint i = 0; i < table->s->keys; i++) { ulong j; @@ -15694,7 +15706,7 @@ << foreign->foreign_table_name; } } else { - dict_table_close(ref_table, true); + ref_table->release(); } } @@ -15852,7 +15864,7 @@ stmt_boundary: trx->bulk_insert_apply(); trx->end_bulk_insert(*m_prebuilt->table); - trx->bulk_insert = false; + trx->bulk_insert &= TRX_DDL_BULK; break; case HA_EXTRA_NO_KEYREAD: (void)check_trx_exists(ha_thd()); @@ -15911,32 +15923,47 @@ break; case HA_EXTRA_END_ALTER_COPY: trx = check_trx_exists(ha_thd()); - if (m_prebuilt->table->skip_alter_undo) { - if (dberr_t err= trx->bulk_insert_apply()) { - m_prebuilt->table->skip_alter_undo = 0; - return convert_error_code_to_mysql( - err, - m_prebuilt->table->flags, - trx->mysql_thd); - } - - trx->end_bulk_insert(*m_prebuilt->table); - trx->bulk_insert = false; - /* During copy alter operation, InnoDB - updates the stats only for non-persistent - tables. */ - if (!dict_stats_is_persistent_enabled( - m_prebuilt->table)) { - dict_stats_update_if_needed( - m_prebuilt->table, *trx); - } + if (!m_prebuilt->table->skip_alter_undo) { + /* This could be invoked inside INSERT...SELECT. + We do not want any extra log writes, because + they could cause a severe performance regression. */ + break; } m_prebuilt->table->skip_alter_undo = 0; + if (dberr_t err= trx->bulk_insert_apply()) { + m_prebuilt->table->skip_alter_undo = 0; + return convert_error_code_to_mysql( + err, m_prebuilt->table->flags, + trx->mysql_thd); + } + + trx->end_bulk_insert(*m_prebuilt->table); + trx->bulk_insert &= TRX_DML_BULK; if (!m_prebuilt->table->is_temporary() && !high_level_read_only) { + /* During copy_data_between_tables(), InnoDB only + updates transient statistics. */ + if (!m_prebuilt->table->stats_is_persistent()) { + dict_stats_update_if_needed(m_prebuilt->table, + *trx); + } + /* The extra log write is necessary for + ALTER TABLE...ALGORITHM=COPY, because + a normal transaction commit would be a no-op + because no undo log records were generated. + This log write will also be unnecessarily executed + during CREATE...SELECT, which is the other caller of + handler::extra(HA_EXTRA_BEGIN_ALTER_COPY). */ log_buffer_flush_to_disk(); } break; + case HA_EXTRA_ABORT_ALTER_COPY: + if (m_prebuilt->table->skip_alter_undo) { + trx = check_trx_exists(ha_thd()); + m_prebuilt->table->skip_alter_undo = 0; + trx->rollback(); + } + break; default:/* Do nothing */ ; } @@ -16031,7 +16058,8 @@ break; } - trx->bulk_insert = false; + ut_ad(trx->bulk_insert != TRX_DDL_BULK); + trx->bulk_insert = TRX_NO_BULK; trx->last_stmt_start = trx->undo_no; } @@ -16239,7 +16267,7 @@ if (!trx->bulk_insert) { break; } - trx->bulk_insert = false; + trx->bulk_insert &= TRX_DDL_BULK; trx->last_stmt_start = trx->undo_no; } @@ -17294,7 +17322,12 @@ param_new = info->option_struct; param_old = table->s->option_struct; - innobase_copy_frm_flags_from_create_info(m_prebuilt->table, info); + m_prebuilt->table->stats_mutex_lock(); + if (!m_prebuilt->table->stat_initialized()) { + innobase_copy_frm_flags_from_create_info( + m_prebuilt->table, info); + } + m_prebuilt->table->stats_mutex_unlock(); if (table_changes != IS_EQUAL_YES) { @@ -17383,7 +17416,8 @@ " higher than innodb_io_capacity_max %lu", in_val, srv_max_io_capacity); - srv_max_io_capacity = (in_val & ~(~0UL >> 1)) + /* Avoid overflow. */ + srv_max_io_capacity = (in_val >= SRV_MAX_IO_CAPACITY_LIMIT / 2) ? in_val : in_val * 2; push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, @@ -17546,22 +17580,6 @@ return(ret); } -extern void buf_resize_start(); - -/** Update the system variable innodb_buffer_pool_size using the "saved" -value. This function is registered as a callback with MySQL. -@param[in] save immediate result from check function */ -static -void -innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save) -{ - snprintf(export_vars.innodb_buffer_pool_resize_status, - sizeof(export_vars.innodb_buffer_pool_resize_status), - "Buffer pool resize requested"); - - buf_resize_start(); -} - /** The latest assigned innodb_ft_aux_table name */ static char* innodb_ft_aux_table; @@ -17576,11 +17594,16 @@ int len = sizeof buf; if (const char* table_name = value->val_str(value, buf, &len)) { + /* Because we are not acquiring MDL on the table name, + we must contiguously hold dict_sys.latch while we are + examining the table, to protect us against concurrent DDL. */ + dict_sys.lock(SRW_LOCK_CALL); if (dict_table_t* table = dict_table_open_on_name( - table_name, false, DICT_ERR_IGNORE_NONE)) { + table_name, true, DICT_ERR_IGNORE_NONE)) { + table->release(); const table_id_t id = dict_table_has_fts_index(table) ? table->id : 0; - dict_table_close(table); + dict_sys.unlock(); if (id) { innodb_ft_aux_table_id = id; if (table_name == buf) { @@ -17591,12 +17614,12 @@ len); } - *static_cast(save) = table_name; return 0; } + } else { + dict_sys.unlock(); } - return 1; } else { *static_cast(save) = NULL; @@ -18385,14 +18408,14 @@ mysql_mutex_unlock(&buf_pool.mutex); } +static my_bool innodb_log_checkpoint_now; #ifdef UNIV_DEBUG -static my_bool innodb_log_checkpoint_now = TRUE; static my_bool innodb_buf_flush_list_now = TRUE; static uint innodb_merge_threshold_set_all_debug = DICT_INDEX_MERGE_THRESHOLD_DEFAULT; +#endif /** Force an InnoDB log checkpoint. */ -/** Force an InnoDB log checkpoint. */ static void checkpoint_now_set(THD* thd, st_mysql_sys_var*, void*, const void *save) @@ -18416,14 +18439,21 @@ const auto size= log_sys.is_encrypted() ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT; mysql_mutex_unlock(&LOCK_global_system_variables); - lsn_t lsn; - while (log_sys.last_checkpoint_lsn.load(std::memory_order_acquire) + size < - (lsn= log_sys.get_lsn(std::memory_order_acquire))) + while (!thd_kill_level(thd)) + { + log_sys.latch.wr_lock(SRW_LOCK_CALL); + lsn_t cp= log_sys.last_checkpoint_lsn.load(std::memory_order_relaxed), + lsn= log_sys.get_lsn(); + log_sys.latch.wr_unlock(); + if (cp + size >= lsn) + break; log_make_checkpoint(); + } mysql_mutex_lock(&LOCK_global_system_variables); } +#ifdef UNIV_DEBUG /****************************************************************//** Force a dirty pages flush now. */ static @@ -18605,7 +18635,7 @@ " innodb_log_buffer_size=%u", MYF(0), log_sys.buf_size); else { - switch (log_sys.resize_start(*static_cast(save))) { + switch (log_sys.resize_start(*static_cast(save), thd)) { case log_t::RESIZE_NO_CHANGE: break; case log_t::RESIZE_IN_PROGRESS: @@ -18617,12 +18647,11 @@ ib_senderrf(thd, IB_LOG_LEVEL_ERROR, ER_CANT_CREATE_HANDLER_FILE); break; case log_t::RESIZE_STARTED: - const lsn_t start{log_sys.resize_in_progress()}; for (timespec abstime;;) { if (thd_kill_level(thd)) { - log_sys.resize_abort(); + log_sys.resize_abort(thd); break; } @@ -18637,37 +18666,25 @@ resizing= log_sys.resize_in_progress(); } mysql_mutex_unlock(&buf_pool.flush_list_mutex); - if (start > log_sys.get_lsn()) + if (!resizing || !log_sys.resize_running(thd)) + break; + log_sys.latch.wr_lock(SRW_LOCK_CALL); + while (resizing > log_sys.get_lsn()) { ut_ad(!log_sys.is_mmap()); /* The server is almost idle. Write dummy FILE_CHECKPOINT records to ensure that the log resizing will complete. */ - log_sys.latch.wr_lock(SRW_LOCK_CALL); - while (start > log_sys.get_lsn()) - { - mtr_t mtr; - mtr.start(); - mtr.commit_files(log_sys.last_checkpoint_lsn); - } - log_sys.latch.wr_unlock(); + mtr_t mtr; + mtr.start(); + mtr.commit_files(log_sys.last_checkpoint_lsn); } - if (!resizing || resizing > start /* only wait for our resize */) - break; + log_sys.latch.wr_unlock(); } } } mysql_mutex_lock(&LOCK_global_system_variables); } -static void innodb_log_spin_wait_delay_update(THD *, st_mysql_sys_var*, - void *, const void *save) -{ - log_sys.latch.wr_lock(SRW_LOCK_CALL); - mtr_t::spin_wait_delay= *static_cast(save); - mtr_t::finisher_update(); - log_sys.latch.wr_unlock(); -} - /** Update innodb_status_output or innodb_status_output_locks, which control InnoDB "status monitor" output to the error log. @param[out] var current value @@ -18987,7 +19004,7 @@ static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity, PLUGIN_VAR_RQCMDARG, "Number of IOPs the server can do. Tunes the background IO rate", - NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0); + NULL, innodb_io_capacity_update, 200, 100, SRV_MAX_IO_CAPACITY_LIMIT, 0); static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity, PLUGIN_VAR_RQCMDARG, @@ -18996,12 +19013,12 @@ SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100, SRV_MAX_IO_CAPACITY_LIMIT, 0); -#ifdef UNIV_DEBUG static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now, PLUGIN_VAR_OPCMDARG, - "Force checkpoint now", + "Write back dirty pages from the buffer pool and update the log checkpoint", NULL, checkpoint_now_set, FALSE); +#ifdef UNIV_DEBUG static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now, PLUGIN_VAR_OPCMDARG, "Force dirty page flush now", @@ -19157,12 +19174,12 @@ " SHOW TABLE STATUS for tables that use transient statistics (off by default)", NULL, NULL, FALSE); -static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages, +static MYSQL_SYSVAR_UINT(stats_transient_sample_pages, srv_stats_transient_sample_pages, PLUGIN_VAR_RQCMDARG, "The number of leaf index pages to sample when calculating transient" " statistics (if persistent statistics are not used, default 8)", - NULL, NULL, 8, 1, ~0ULL, 0); + NULL, NULL, 8, 1, ~0U, 0); static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent, PLUGIN_VAR_OPCMDARG, @@ -19178,12 +19195,12 @@ " new statistics)", NULL, NULL, TRUE); -static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages, +static MYSQL_SYSVAR_UINT(stats_persistent_sample_pages, srv_stats_persistent_sample_pages, PLUGIN_VAR_RQCMDARG, "The number of leaf index pages to sample when calculating persistent" " statistics (by ANALYZE, default 20)", - NULL, NULL, 20, 1, ~0ULL, 0); + NULL, NULL, 20, 1, ~0U, 0); static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter, PLUGIN_VAR_RQCMDARG, @@ -19222,11 +19239,12 @@ "Data file autoextend increment in megabytes", NULL, NULL, 64, 1, 1000, 0); -static MYSQL_SYSVAR_SIZE_T(buffer_pool_chunk_size, srv_buf_pool_chunk_unit, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Size of a single memory chunk" - " for resizing buffer pool. Online buffer pool resizing happens at this" - " granularity. 0 means autosize this variable based on buffer pool size.", +static size_t innodb_buffer_pool_chunk_size; + +static MYSQL_SYSVAR_SIZE_T(buffer_pool_chunk_size, + innodb_buffer_pool_chunk_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_DEPRECATED, + "Deprecated parameter with no effect", NULL, NULL, 0, 0, SIZE_T_MAX, 1024 * 1024); @@ -19525,11 +19543,12 @@ nullptr, innodb_log_file_size_update, 96 << 20, 4 << 20, std::numeric_limits::max(), 4096); -static MYSQL_SYSVAR_UINT(log_spin_wait_delay, mtr_t::spin_wait_delay, - PLUGIN_VAR_OPCMDARG, - "Delay between log buffer spin lock polls (0 to use a blocking latch)", - nullptr, innodb_log_spin_wait_delay_update, - 0, 0, 6000, 0); +static uint innodb_log_spin_wait_delay; + +static MYSQL_SYSVAR_UINT(log_spin_wait_delay, innodb_log_spin_wait_delay, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_DEPRECATED, + "Deprecated parameter with no effect", + nullptr, nullptr, 0, 0, 6000, 0); static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct, PLUGIN_VAR_RQCMDARG, @@ -19634,37 +19653,10 @@ AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */ AUTOINC_NO_LOCKING, 0); /* Maximum value */ -#ifdef HAVE_URING -# include -static utsname uname_for_io_uring; -#else -static -#endif -bool innodb_use_native_aio_default() -{ -#ifdef HAVE_URING - utsname &u= uname_for_io_uring; - if (!uname(&u) && u.release[0] == '5' && u.release[1] == '.' && - u.release[2] == '1' && u.release[3] >= '1' && u.release[3] <= '5' && - u.release[4] == '.') - { - if (u.release[3] == '5') { - const char *s= strstr(u.version, "5.15."); - if (s || (s= strstr(u.release, "5.15."))) - if ((s[5] >= '3' || s[6] >= '0')) - return true; /* 5.15.3 and later should be fine */ - } - io_uring_may_be_unsafe= u.release; - return false; /* working around io_uring hangs (MDEV-26674) */ - } -#endif - return true; -} - static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "Use native AIO if supported on this platform.", - NULL, NULL, innodb_use_native_aio_default()); + NULL, NULL, TRUE); #ifdef HAVE_LIBNUMA static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave, @@ -19953,6 +19945,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), +#if defined __linux__ || !defined DBUG_OFF + MYSQL_SYSVAR(buffer_pool_size_auto_min), +#endif + MYSQL_SYSVAR(buffer_pool_size_max), MYSQL_SYSVAR(buffer_pool_chunk_size), MYSQL_SYSVAR(buffer_pool_filename), MYSQL_SYSVAR(buffer_pool_dump_now), @@ -20079,8 +20075,8 @@ MYSQL_SYSVAR(monitor_reset_all), MYSQL_SYSVAR(purge_threads), MYSQL_SYSVAR(purge_batch_size), -#ifdef UNIV_DEBUG MYSQL_SYSVAR(log_checkpoint_now), +#ifdef UNIV_DEBUG MYSQL_SYSVAR(buf_flush_list_now), MYSQL_SYSVAR(merge_threshold_set_all_debug), #endif /* UNIV_DEBUG */ @@ -21057,90 +21053,6 @@ cs2, to, static_cast(len), errors))); } -/** Validate the requested buffer pool size. Also, reserve the necessary -memory needed for buffer pool resize. -@param[in] thd thread handle -@param[out] save immediate result for update function -@param[in] value incoming string -@return 0 on success, 1 on failure. -*/ -static -int -innodb_buffer_pool_size_validate( - THD* thd, - st_mysql_sys_var*, - void* save, - struct st_mysql_value* value) -{ - longlong intbuf; - - value->val_int(value, &intbuf); - - if (static_cast(intbuf) < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "innodb_buffer_pool_size must be at least" - " %lld for innodb_page_size=%lu", - MYSQL_SYSVAR_NAME(buffer_pool_size).min_val, - srv_page_size); - return(1); - } - - if (!srv_was_started) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Cannot update innodb_buffer_pool_size," - " because InnoDB is not started."); - return(1); - } - - mysql_mutex_lock(&buf_pool.mutex); - - if (srv_buf_pool_old_size != srv_buf_pool_size) { - mysql_mutex_unlock(&buf_pool.mutex); - my_printf_error(ER_WRONG_ARGUMENTS, - "Another buffer pool resize is already in progress.", MYF(0)); - return(1); - } - - ulint requested_buf_pool_size = buf_pool_size_align(ulint(intbuf)); - - *static_cast(save) = requested_buf_pool_size; - - if (srv_buf_pool_size == ulint(intbuf)) { - mysql_mutex_unlock(&buf_pool.mutex); - /* nothing to do */ - return(0); - } - - if (srv_buf_pool_size == requested_buf_pool_size) { - mysql_mutex_unlock(&buf_pool.mutex); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "innodb_buffer_pool_size must be at least" - " innodb_buffer_pool_chunk_size=%zu", - srv_buf_pool_chunk_unit); - /* nothing to do */ - return(0); - } - - srv_buf_pool_size = requested_buf_pool_size; - mysql_mutex_unlock(&buf_pool.mutex); - - if (intbuf != static_cast(requested_buf_pool_size)) { - char buf[64]; - int len = 64; - value->val_str(value, buf, &len); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_TRUNCATED_WRONG_VALUE, - "Truncated incorrect %-.32s value: '%-.128s'", - mysql_sysvar_buffer_pool_size.name, - value->val_str(value, buf, &len)); - } - - return(0); -} - /*************************************************************//** Check for a valid value of innobase_compression_algorithm. @return 0 for valid innodb_compression_algorithm. */ @@ -21436,19 +21348,3 @@ if (UNIV_LIKELY_NULL(local_heap)) mem_heap_free(local_heap); } - -/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, -if needed. -@param[in] size size in bytes -@return aligned size */ -ulint buf_pool_size_align(ulint size) noexcept -{ - const size_t m = srv_buf_pool_chunk_unit; - size = ut_max(size, (size_t) MYSQL_SYSVAR_NAME(buffer_pool_size).min_val); - - if (size % m == 0) { - return(size); - } else { - return (size / m + 1) * m; - } -} diff -Nru mariadb-10.11.11/storage/innobase/handler/ha_innodb.h mariadb-10.11.13/storage/innobase/handler/ha_innodb.h --- mariadb-10.11.11/storage/innobase/handler/ha_innodb.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/handler/ha_innodb.h 2025-05-19 16:14:25.000000000 +0000 @@ -101,6 +101,9 @@ int open(const char *name, int mode, uint test_if_locked) override; + /** Fetch or recalculate InnoDB table statistics */ + dberr_t statistics_init(dict_table_t *table, bool recalc); + handler* clone(const char *name, MEM_ROOT *mem_root) override; int close(void) override; diff -Nru mariadb-10.11.11/storage/innobase/handler/handler0alter.cc mariadb-10.11.13/storage/innobase/handler/handler0alter.cc --- mariadb-10.11.11/storage/innobase/handler/handler0alter.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/handler/handler0alter.cc 2025-05-19 16:14:25.000000000 +0000 @@ -621,6 +621,16 @@ } dict_index_t* index = dict_table_get_first_index(this); + if (instant) { + instant->field_map= static_cast( + mem_heap_dup(heap, instant->field_map, + (index->n_fields - + index->first_user_field()) * + sizeof *instant->field_map)); + instant= static_cast( + mem_heap_dup(heap, instant, sizeof *instant)); + } + bool metadata_changed; { const dict_index_t& i = *dict_table_get_first_index(&table); @@ -2241,6 +2251,12 @@ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); } + if (ha_alter_info->create_info->used_fields + & HA_CREATE_USED_SEQUENCE) { + ha_alter_info->unsupported_reason = "SEQUENCE"; + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } + update_thd(); if (!m_prebuilt->table->space) { @@ -5525,6 +5541,12 @@ return false; } + DBUG_EXECUTE_IF("instant_insert_fail", + my_error(ER_INTERNAL_ERROR, MYF(0), + "InnoDB: Insert into SYS_COLUMNS failed"); + mem_heap_free(info->heap); + return true;); + if (DB_SUCCESS != que_eval_sql( info, "PROCEDURE ADD_COL () IS\n" @@ -6512,6 +6534,8 @@ DBUG_ASSERT(!ctx->add_index); DBUG_ASSERT(!ctx->add_key_numbers); DBUG_ASSERT(!ctx->num_to_add_index); + DBUG_ASSERT(!(ha_alter_info->create_info->used_fields + & HA_CREATE_USED_SEQUENCE)); user_table = ctx->new_table; @@ -6611,8 +6635,9 @@ mem_heap_alloc(ctx->heap, ctx->num_to_add_index * sizeof *ctx->add_key_numbers)); - const bool fts_exist = ctx->new_table->flags2 + const bool have_fts = user_table->flags2 & (DICT_TF2_FTS_HAS_DOC_ID | DICT_TF2_FTS); + const bool pause_purge = have_fts || user_table->get_ref_count() > 1; /* Acquire a lock on the table before creating any indexes. */ bool table_lock_failed = false; @@ -6639,13 +6664,18 @@ user_table->lock_shared_unlock(); } - if (fts_exist) { - purge_sys.stop_FTS(*ctx->new_table); + if (pause_purge) { + purge_sys.stop_FTS(); + if (have_fts) { + purge_sys.stop_FTS(*user_table, true); + } if (error == DB_SUCCESS) { - error = fts_lock_tables(ctx->trx, *ctx->new_table); + error = fts_lock_tables(ctx->trx, *user_table); } } + ut_ad(user_table->get_ref_count() == 1); + if (error == DB_SUCCESS) { error = lock_sys_tables(ctx->trx); } @@ -7478,7 +7508,7 @@ /* fts_create_common_tables() may drop old common tables, whose files would be deleted here. */ commit_unlock_and_unlink(ctx->trx); - if (fts_exist) { + if (pause_purge) { purge_sys.resume_FTS(); } @@ -7542,10 +7572,11 @@ } } - /* n_ref_count must be 1, because background threads cannot + /* n_ref_count must be 1 (+ InnoDB_share), + because background threads cannot be executing on this very table as we are holding MDL_EXCLUSIVE. */ - ut_ad(ctx->online || user_table->get_ref_count() == 1); + ut_ad(ctx->online || ((user_table->get_ref_count() - 1) <= 1)); if (new_clustered) { online_retry_drop_indexes_low(user_table, ctx->trx); @@ -7574,7 +7605,7 @@ ctx->trx->free(); } trx_commit_for_mysql(ctx->prebuilt->trx); - if (fts_exist) { + if (pause_purge) { purge_sys.resume_FTS(); } @@ -11180,7 +11211,10 @@ DBUG_ENTER("alter_stats_norebuild"); DBUG_ASSERT(!ctx->need_rebuild()); - if (!dict_stats_is_persistent_enabled(ctx->new_table)) { + auto stat = ctx->new_table->stat; + + if (!dict_table_t::stat_initialized(stat) + || !dict_table_t::stats_is_persistent(stat)) { DBUG_VOID_RETURN; } @@ -11189,7 +11223,6 @@ DBUG_ASSERT(index->table == ctx->new_table); if (!(index->type & DICT_FTS)) { - dict_stats_init(ctx->new_table); dict_stats_update_for_index(index); } } @@ -11214,12 +11247,15 @@ { DBUG_ENTER("alter_stats_rebuild"); - if (!table->space - || !dict_stats_is_persistent_enabled(table)) { + if (!table->space || !table->stats_is_persistent() + || dict_stats_persistent_storage_check(false) != SCHEMA_OK) { DBUG_VOID_RETURN; } - dberr_t ret = dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT); + dberr_t ret = dict_stats_update_persistent(table); + if (ret == DB_SUCCESS) { + ret = dict_stats_save(table); + } if (ret != DB_SUCCESS) { push_warning_printf( @@ -11332,6 +11368,13 @@ /* A rollback is being requested. So far we may at most have created stubs for ADD INDEX or a copy of the table for rebuild. */ +#if 0 /* FIXME: is there a better way for innodb.innodb-index-online? */ + lock_shared_ha_data(); + auto share = static_cast(get_ha_share_ptr()); + set_ha_share_ptr(nullptr); + unlock_shared_ha_data(); + delete share; +#endif DBUG_RETURN(rollback_inplace_alter_table( ha_alter_info, table, m_prebuilt)); } @@ -11559,34 +11602,16 @@ } } - dict_table_t *table_stats = nullptr, *index_stats = nullptr; - MDL_ticket *mdl_table = nullptr, *mdl_index = nullptr; + dict_stats stats; + bool stats_failed = true; dberr_t error = DB_SUCCESS; if (!ctx0->old_table->is_stats_table() && !ctx0->new_table->is_stats_table()) { - table_stats = dict_table_open_on_name( - TABLE_STATS_NAME, false, DICT_ERR_IGNORE_NONE); - if (table_stats) { - dict_sys.freeze(SRW_LOCK_CALL); - table_stats = dict_acquire_mdl_shared( - table_stats, m_user_thd, &mdl_table); - dict_sys.unfreeze(); - } - index_stats = dict_table_open_on_name( - INDEX_STATS_NAME, false, DICT_ERR_IGNORE_NONE); - if (index_stats) { - dict_sys.freeze(SRW_LOCK_CALL); - index_stats = dict_acquire_mdl_shared( - index_stats, m_user_thd, &mdl_index); - dict_sys.unfreeze(); - } - - if (table_stats && index_stats - && !strcmp(table_stats->name.m_name, TABLE_STATS_NAME) - && !strcmp(index_stats->name.m_name, INDEX_STATS_NAME) - && !(error = lock_table_for_trx(table_stats, + stats_failed = stats.open(m_user_thd); + if (!stats_failed + && !(error = lock_table_for_trx(stats.table(), trx, LOCK_X))) { - error = lock_table_for_trx(index_stats, trx, LOCK_X); + error = lock_table_for_trx(stats.index(), trx, LOCK_X); } } @@ -11600,15 +11625,9 @@ error = lock_sys_tables(trx); } if (error != DB_SUCCESS) { - if (table_stats) { - dict_table_close(table_stats, false, m_user_thd, - mdl_table); - } - if (index_stats) { - dict_table_close(index_stats, false, m_user_thd, - mdl_index); + if (!stats_failed) { + stats.close(); } - my_error_innodb(error, table_share->table_name.str, 0); if (fts_exist) { purge_sys.resume_FTS(); } @@ -11624,6 +11643,7 @@ trx_start_for_ddl(trx); } + my_error_innodb(error, table_share->table_name.str, 0); DBUG_RETURN(true); } @@ -11641,15 +11661,10 @@ fail: trx->rollback(); ut_ad(!trx->fts_trx); - if (table_stats) { - dict_table_close(table_stats, true, m_user_thd, - mdl_table); - } - if (index_stats) { - dict_table_close(index_stats, true, m_user_thd, - mdl_index); - } row_mysql_unlock_data_dictionary(trx); + if (!stats_failed) { + stats.close(); + } if (fts_exist) { purge_sys.resume_FTS(); } @@ -11669,14 +11684,14 @@ if (commit_try_rebuild(ha_alter_info, ctx, altered_table, table, - table_stats && index_stats, + !stats_failed, trx, table_share->table_name.str)) { goto fail; } } else if (commit_try_norebuild(ha_alter_info, ctx, altered_table, table, - table_stats && index_stats, + !stats_failed, trx, table_share->table_name.str)) { goto fail; @@ -11699,13 +11714,6 @@ #endif } - if (table_stats) { - dict_table_close(table_stats, true, m_user_thd, mdl_table); - } - if (index_stats) { - dict_table_close(index_stats, true, m_user_thd, mdl_index); - } - /* Commit or roll back the changes to the data dictionary. */ DEBUG_SYNC(m_user_thd, "innodb_alter_inplace_before_commit"); @@ -11854,6 +11862,9 @@ DBUG_EXECUTE_IF("innodb_alter_commit_crash_after_commit", DBUG_SUICIDE();); trx->free(); + if (!stats_failed) { + stats.close(); + } if (fts_exist) { purge_sys.resume_FTS(); } @@ -11910,6 +11921,9 @@ DBUG_EXECUTE_IF("innodb_alter_commit_crash_after_commit", DBUG_SUICIDE();); trx->free(); + if (!stats_failed) { + stats.close(); + } if (fts_exist) { purge_sys.resume_FTS(); } diff -Nru mariadb-10.11.11/storage/innobase/handler/i_s.cc mariadb-10.11.13/storage/innobase/handler/i_s.cc --- mariadb-10.11.11/storage/innobase/handler/i_s.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/handler/i_s.cc 2025-05-19 16:14:25.000000000 +0000 @@ -2230,7 +2230,7 @@ DBUG_RETURN(0); } else if (!dict_table_has_fts_index(user_table) || !user_table->is_readable()) { - dict_table_close(user_table, false, thd, mdl_ticket); + dict_table_close(user_table, thd, mdl_ticket); DBUG_RETURN(0); } @@ -2245,7 +2245,7 @@ fts_table_fetch_doc_ids(trx, &fts_table, deleted); - dict_table_close(user_table, false, thd, mdl_ticket); + dict_table_close(user_table, thd, mdl_ticket); trx->free(); @@ -2578,7 +2578,7 @@ } if (!user_table->fts || !user_table->fts->cache) { - dict_table_close(user_table, false, thd, mdl_ticket); + dict_table_close(user_table, thd, mdl_ticket); DBUG_RETURN(0); } @@ -2603,7 +2603,7 @@ } mysql_mutex_unlock(&cache->lock); - dict_table_close(user_table, false, thd, mdl_ticket); + dict_table_close(user_table, thd, mdl_ticket); DBUG_RETURN(ret); } @@ -3020,7 +3020,7 @@ } } - dict_table_close(user_table, false, thd, mdl_ticket); + dict_table_close(user_table, thd, mdl_ticket); ut_free(conv_str.f_str); @@ -3145,7 +3145,7 @@ } if (!dict_table_has_fts_index(user_table)) { - dict_table_close(user_table, false, thd, mdl_ticket); + dict_table_close(user_table, thd, mdl_ticket); DBUG_RETURN(0); } @@ -3202,7 +3202,7 @@ fts_sql_commit(trx); - dict_table_close(user_table, false, thd, mdl_ticket); + dict_table_close(user_table, thd, mdl_ticket); trx->free(); @@ -3388,7 +3388,7 @@ DBUG_RETURN(0); } - buf_stats_get_pool_info(&info); + buf_pool.get_info(&info); table = tables->table; @@ -3937,87 +3937,37 @@ @return 0 on success, 1 on failure */ static int i_s_innodb_buffer_page_fill(THD *thd, TABLE_LIST *tables, Item *) { - int status = 0; - mem_heap_t* heap; - - DBUG_ENTER("i_s_innodb_buffer_page_fill"); - - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str); - - /* deny access to user without PROCESS privilege */ - if (check_global_access(thd, PROCESS_ACL)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(10000); - - for (ulint n = 0; - n < ut_min(buf_pool.n_chunks, buf_pool.n_chunks_new); n++) { - const buf_block_t* block; - ulint n_blocks; - buf_page_info_t* info_buffer; - ulint num_page; - ulint mem_size; - ulint chunk_size; - ulint num_to_process = 0; - ulint block_id = 0; - - /* Get buffer block of the nth chunk */ - block = buf_pool.chunks[n].blocks; - chunk_size = buf_pool.chunks[n].size; - num_page = 0; - - while (chunk_size > 0) { - /* we cache maximum MAX_BUF_INFO_CACHED number of - buffer page info */ - num_to_process = ut_min(chunk_size, - (ulint)MAX_BUF_INFO_CACHED); - - mem_size = num_to_process * sizeof(buf_page_info_t); - - /* For each chunk, we'll pre-allocate information - structures to cache the page information read from - the buffer pool. Doing so before obtain any mutex */ - info_buffer = (buf_page_info_t*) mem_heap_zalloc( - heap, mem_size); - - /* Obtain appropriate mutexes. Since this is diagnostic - buffer pool info printout, we are not required to - preserve the overall consistency, so we can - release mutex periodically */ - mysql_mutex_lock(&buf_pool.mutex); - - /* GO through each block in the chunk */ - for (n_blocks = num_to_process; n_blocks--; block++) { - i_s_innodb_buffer_page_get_info( - &block->page, block_id, - info_buffer + num_page); - block_id++; - num_page++; - } - - mysql_mutex_unlock(&buf_pool.mutex); + DBUG_ENTER("i_s_innodb_buffer_page_fill"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str); - /* Fill in information schema table with information - just collected from the buffer chunk scan */ - status = i_s_innodb_buffer_page_fill( - thd, tables, info_buffer, - num_page); - - /* If something goes wrong, break and return */ - if (status) { - break; - } - - mem_heap_empty(heap); - chunk_size -= num_to_process; - num_page = 0; - } - } - - mem_heap_free(heap); - - DBUG_RETURN(status); + /* deny access to user without PROCESS privilege */ + if (check_global_access(thd, PROCESS_ACL)) + DBUG_RETURN(0); + + int status; + buf_page_info_t *b= + static_cast(my_malloc(PSI_INSTRUMENT_ME, + MAX_BUF_INFO_CACHED * sizeof *b, + MYF(MY_WME))); + if (!b) + DBUG_RETURN(1); + for (size_t j= 0;;) + { + memset((void*) b, 0, MAX_BUF_INFO_CACHED * sizeof *b); + mysql_mutex_lock(&buf_pool.mutex); + const size_t N= buf_pool.curr_size(); + const size_t n= std::min(N, MAX_BUF_INFO_CACHED); + for (size_t i= 0; i < n && j < N; i++, j++) + i_s_innodb_buffer_page_get_info(&buf_pool.get_nth_page(j)->page, j, + &b[i]); + + mysql_mutex_unlock(&buf_pool.mutex); + status= i_s_innodb_buffer_page_fill(thd, tables, b, n); + if (status || j >= N) + break; + } + my_free(b); + DBUG_RETURN(status); } /*******************************************************************//** @@ -4777,9 +4727,9 @@ OK(field_store_string(fields[SYS_TABLESTATS_NAME], table->name.m_name)); - OK(fields[SYS_TABLESTATS_INIT]->store(table->stat_initialized, true)); + OK(fields[SYS_TABLESTATS_INIT]->store(table->stat_initialized(), true)); - if (table->stat_initialized) + if (table->stat_initialized()) { OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows, true)); diff -Nru mariadb-10.11.11/storage/innobase/ibuf/ibuf0ibuf.cc mariadb-10.11.13/storage/innobase/ibuf/ibuf0ibuf.cc --- mariadb-10.11.11/storage/innobase/ibuf/ibuf0ibuf.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/ibuf/ibuf0ibuf.cc 2025-05-19 16:14:25.000000000 +0000 @@ -375,7 +375,7 @@ ibuf.free_list_len = flst_get_len(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST); - ibuf.height = 1 + btr_page_get_level(root); + ibuf.height = uint8_t(1 + btr_page_get_level(root)); /* the '1 +' is the ibuf header page */ ibuf.size = ibuf.seg_size - (1 + ibuf.free_list_len); @@ -443,18 +443,11 @@ goto err_exit; } - /* At startup we intialize ibuf to have a maximum of - CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the - buffer pool size. Once ibuf struct is initialized this - value is updated with the user supplied size by calling - ibuf_max_size_update(). */ - ibuf.max_size = ((buf_pool_get_curr_size() >> srv_page_size_shift) - * CHANGE_BUFFER_DEFAULT_SIZE) / 100; - mysql_mutex_init(ibuf_mutex_key, &ibuf_mutex, nullptr); mysql_mutex_init(ibuf_pessimistic_insert_mutex_key, &ibuf_pessimistic_insert_mutex, nullptr); + ibuf_max_size_update(CHANGE_BUFFER_DEFAULT_SIZE); mysql_mutex_lock(&ibuf_mutex); ibuf_size_update(root); mysql_mutex_unlock(&ibuf_mutex); @@ -506,10 +499,10 @@ percentage of the buffer pool size */ { if (UNIV_UNLIKELY(!ibuf.index)) return; - ulint new_size = ((buf_pool_get_curr_size() >> srv_page_size_shift) - * new_val) / 100; + ulint new_size = std::min( + buf_pool.curr_size() * new_val / 100, uint32_t(~0U)); mysql_mutex_lock(&ibuf_mutex); - ibuf.max_size = new_size; + ibuf.max_size = uint32_t(new_size); mysql_mutex_unlock(&ibuf_mutex); } @@ -2061,8 +2054,7 @@ } } - limit = ut_min(IBUF_MAX_N_PAGES_MERGED, - buf_pool_get_curr_size() / 4); + limit = std::min(IBUF_MAX_N_PAGES_MERGED, buf_pool.curr_size() / 4); first_page_no = ibuf_rec_get_page_no(mtr, rec); first_space_id = ibuf_rec_get_space(mtr, rec); @@ -4483,17 +4475,17 @@ return; } - const ulint size= ibuf.size; - const ulint free_list_len= ibuf.free_list_len; - const ulint seg_size= ibuf.seg_size; + const uint32_t size= ibuf.size; + const uint32_t free_list_len= ibuf.free_list_len; + const uint32_t seg_size= ibuf.seg_size; mysql_mutex_unlock(&ibuf_mutex); fprintf(file, "-------------\n" "INSERT BUFFER\n" "-------------\n" - "size " ULINTPF ", free list len " ULINTPF "," - " seg size " ULINTPF ", " ULINTPF " merges\n", + "size %" PRIu32 ", free list len %" PRIu32 "," + " seg size %" PRIu32 ", " ULINTPF " merges\n", size, free_list_len, seg_size, ulint{ibuf.n_merges}); ibuf_print_ops("merged operations:\n", ibuf.n_merged_ops, file); ibuf_print_ops("discarded operations:\n", ibuf.n_discarded_ops, file); diff -Nru mariadb-10.11.11/storage/innobase/include/btr0sea.h mariadb-10.11.13/storage/innobase/include/btr0sea.h --- mariadb-10.11.11/storage/innobase/include/btr0sea.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/btr0sea.h 2025-05-19 16:14:25.000000000 +0000 @@ -39,12 +39,16 @@ #define btr_search_sys_create() btr_search_sys.create() #define btr_search_sys_free() btr_search_sys.free() -/** Disable the adaptive hash search system and empty the index. */ -void btr_search_disable(); +/** Lazily free detached metadata when removing the last reference. */ +ATTRIBUTE_COLD void btr_search_lazy_free(dict_index_t *index); + +/** Disable the adaptive hash search system and empty the index. +@return whether the adaptive hash index was enabled */ +ATTRIBUTE_COLD bool btr_search_disable(); /** Enable the adaptive hash search system. @param resize whether buf_pool_t::resize() is the caller */ -void btr_search_enable(bool resize= false); +ATTRIBUTE_COLD void btr_search_enable(bool resize= false); /*********************************************************************//** Updates the search info. */ diff -Nru mariadb-10.11.11/storage/innobase/include/buf0buddy.h mariadb-10.11.13/storage/innobase/include/buf0buddy.h --- mariadb-10.11.11/storage/innobase/include/buf0buddy.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/buf0buddy.h 2025-05-19 16:14:25.000000000 +0000 @@ -24,17 +24,13 @@ Created December 2006 by Marko Makela *******************************************************/ -#ifndef buf0buddy_h -#define buf0buddy_h - +#pragma once #include "buf0types.h" /** @param[in] block size in bytes @return index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -inline -ulint -buf_buddy_get_slot(ulint size) +inline ulint buf_buddy_get_slot(ulint size) noexcept { ulint i; ulint s; @@ -53,13 +49,13 @@ @param i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -byte *buf_buddy_alloc_low(ulint i, bool *lru) MY_ATTRIBUTE((malloc)); +byte *buf_buddy_alloc_low(ulint i, bool *lru) noexcept MY_ATTRIBUTE((malloc)); /** Allocate a ROW_FORMAT=COMPRESSED block. @param size compressed page size in bytes @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) +inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) noexcept { return buf_buddy_alloc_low(buf_buddy_get_slot(size), lru); } @@ -68,24 +64,26 @@ @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] i index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -void buf_buddy_free_low(void* buf, ulint i); +void buf_buddy_free_low(void* buf, ulint i) noexcept; /** Deallocate a block. @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] size block size in bytes */ -inline void buf_buddy_free(void* buf, ulint size) +inline void buf_buddy_free(void* buf, ulint size) noexcept { - buf_buddy_free_low(buf, buf_buddy_get_slot(size)); + buf_buddy_free_low(buf, buf_buddy_get_slot(size)); } -/** Try to reallocate a block. -@param[in] buf block to be reallocated, must be pointed -to by the buffer pool -@param[in] size block size, up to srv_page_size -@retval false if failed because of no free blocks. */ -bool buf_buddy_realloc(void* buf, ulint size); - -/** Combine all pairs of free buddies. */ -void buf_buddy_condense_free(); -#endif /* buf0buddy_h */ +ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result)) +/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::shrink(). +@param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page +@param block uncompressed block for storage +@return block +@retval nullptr if the block was consumed */ +ATTRIBUTE_COLD +buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept; + +/** Combine all pairs of free buddies. +@param size the target innodb_buffer_pool_size */ +ATTRIBUTE_COLD void buf_buddy_condense_free(size_t size) noexcept; diff -Nru mariadb-10.11.11/storage/innobase/include/buf0buf.h mariadb-10.11.13/storage/innobase/include/buf0buf.h --- mariadb-10.11.11/storage/innobase/include/buf0buf.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/buf0buf.h 2025-05-19 16:14:25.000000000 +0000 @@ -35,13 +35,16 @@ #include "assume_aligned.h" #include "buf0types.h" #ifndef UNIV_INNOCHECKSUM -#include "ut0byte.h" #include "page0types.h" #include "log0log.h" #include "srv0srv.h" #include "transactional_lock_guard.h" #include +/** The allocation granularity of innodb_buffer_pool_size */ +constexpr size_t innodb_buffer_pool_extent_size= + sizeof(size_t) < 8 ? 2 << 20 : 8 << 20; + /** @name Modes for buf_page_get_gen */ /* @{ */ #define BUF_GET 10 /*!< get always */ @@ -71,7 +74,7 @@ ulint pool_size; /*!< Buffer Pool size in pages */ ulint lru_len; /*!< Length of buf_pool.LRU */ ulint old_lru_len; /*!< buf_pool.LRU_old_len */ - ulint free_list_len; /*!< Length of buf_pool.free list */ + ulint free_list_len; /*!< free + lazy_allocate_size() */ ulint flush_list_len; /*!< Length of buf_pool.flush_list */ ulint n_pend_unzip; /*!< buf_pool.n_pend_unzip, pages pending decompress */ @@ -142,10 +145,8 @@ const page_id_t page_id); #ifndef UNIV_INNOCHECKSUM -# define buf_pool_get_curr_size() srv_buf_pool_curr_size # define buf_block_free(block) buf_pool.free_block(block) - -#define buf_page_get(ID, SIZE, LA, MTR) \ +# define buf_page_get(ID, SIZE, LA, MTR) \ buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, MTR) /** Try to buffer-fix a page. @@ -395,9 +396,6 @@ buf_print_io( /*=========*/ FILE* file); /*!< in: file where to print */ -/** Collect buffer pool metadata. -@param[out] pool_info buffer pool metadata */ -void buf_stats_get_pool_info(buf_pool_info_t *pool_info) noexcept; /** Refresh the statistics used to print per-second averages. */ void buf_refresh_io_stats() noexcept; @@ -427,12 +425,6 @@ ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read) noexcept; -/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, -if needed. -@param[in] size size in bytes -@return aligned size */ -ulint buf_pool_size_align(ulint size) noexcept; - /** Verify that post encryption checksum match with the calculated checksum. This function should be called only if tablespace contains crypt data metadata. @param page page frame @@ -549,7 +541,7 @@ /** buf_pool.LRU status mask in state() */ static constexpr uint32_t LRU_MASK= 7U << 29; - /** lock covering the contents of frame */ + /** lock covering the contents of frame() */ block_lock lock; /** pointer to aligned, uncompressed page frame of innodb_page_size */ byte *frame; @@ -559,8 +551,6 @@ !frame && !zip.data means an active buf_pool.watch */ page_zip_des_t zip; #ifdef UNIV_DEBUG - /** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */ - bool in_zip_hash; /** whether this->LRU is in buf_pool.LRU (in_file()); protected by buf_pool.mutex */ bool in_LRU_list; @@ -574,7 +564,7 @@ /** list member in one of the lists of buf_pool; protected by buf_pool.mutex or buf_pool.flush_list_mutex - state() == NOT_USED: buf_pool.free or buf_pool.withdraw + state() == NOT_USED: buf_pool.free in_file() && oldest_modification(): buf_pool.flush_list (protected by buf_pool.flush_list_mutex) @@ -615,7 +605,7 @@ lock() /* not copied */, frame(b.frame), zip(b.zip), #ifdef UNIV_DEBUG - in_zip_hash(b.in_zip_hash), in_LRU_list(b.in_LRU_list), + in_LRU_list(b.in_LRU_list), in_page_hash(b.in_page_hash), in_free_list(b.in_free_list), #endif /* UNIV_DEBUG */ list(b.list), LRU(b.LRU), old(b.old), freed_page_clock(b.freed_page_clock), @@ -632,7 +622,6 @@ id_= id; zip.fix= state; oldest_modification_= 0; - ut_d(in_zip_hash= false); ut_d(in_free_list= false); ut_d(in_LRU_list= false); ut_d(in_page_hash= false); @@ -891,10 +880,6 @@ buf_pool.page_hash can point to buf_page_t or buf_block_t */ #ifdef UNIV_DEBUG - /** whether page.list is in buf_pool.withdraw - ((state() == NOT_USED)) and the buffer pool is being shrunk; - protected by buf_pool.mutex */ - bool in_withdraw_list; /** whether unzip_LRU is in buf_pool.unzip_LRU (in_file() && frame && zip.data); protected by buf_pool.mutex */ @@ -1022,15 +1007,10 @@ @param state initial state() */ void initialise(const page_id_t page_id, ulint zip_size, uint32_t state) noexcept; -}; -/**********************************************************************//** -Compute the hash fold value for blocks in buf_pool.zip_hash. */ -/* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift) -#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->page.frame) -#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) -/* @} */ + /** Calculate the page frame address */ + IF_DBUG(,inline) byte *frame_address() const noexcept; +}; /** A "Hazard Pointer" class used to iterate over buf_pool.LRU or buf_pool.flush_list. A hazard pointer is a buf_page_t pointer @@ -1198,59 +1178,66 @@ /** The buffer pool */ class buf_pool_t { - /** A chunk of buffers */ - struct chunk_t - { - /** number of elements in blocks[] */ - size_t size; - /** memory allocated for the page frames */ - unsigned char *mem; - /** descriptor of mem */ - ut_new_pfx_t mem_pfx; - /** array of buffer control blocks */ - buf_block_t *blocks; - - /** Map of first page frame address to chunks[] */ - using map= std::map, - ut_allocator>>; - /** Chunk map that may be under construction by buf_resize_thread() */ - static map *map_reg; - /** Current chunk map for lookup only */ - static map *map_ref; - - /** @return the memory size bytes. */ - size_t mem_size() const noexcept { return mem_pfx.m_size; } - - /** Register the chunk */ - void reg() noexcept - { map_reg->emplace(map::value_type(blocks->page.frame, this)); } - - /** Allocate a chunk of buffer frames. - @param bytes requested size - @return whether the allocation succeeded */ - inline bool create(size_t bytes) noexcept; + /** arrays of buf_block_t followed by page frames; + aliged to and repeating every innodb_buffer_pool_extent_size; + each extent comprises pages_in_extent[] blocks */ + alignas(CPU_LEVEL1_DCACHE_LINESIZE) char *memory; + /** the allocation of the above memory, possibly including some + alignment loss at the beginning */ + char *memory_unaligned; + /** the virtual address range size of memory_unaligned */ + size_t size_unaligned; +#ifdef UNIV_PFS_MEMORY + /** the "owner thread" of the buffer pool allocation */ + PSI_thread *owner; +#endif + /** initialized number of block descriptors */ + size_t n_blocks; + /** number of blocks that need to be freed in shrink() */ + size_t n_blocks_to_withdraw; + /** first block to withdraw in shrink() */ + const buf_page_t *first_to_withdraw; -#ifdef UNIV_DEBUG - /** Find a block that points to a ROW_FORMAT=COMPRESSED page - @param data pointer to the start of a ROW_FORMAT=COMPRESSED page frame - @return the block - @retval nullptr if not found */ - const buf_block_t *contains_zip(const void *data) const noexcept - { - const buf_block_t *block= blocks; - for (auto i= size; i--; block++) - if (block->page.zip.data == data) - return block; - return nullptr; - } + /** amount of memory allocated to the buffer pool and descriptors; + protected by mutex */ + Atomic_relaxed size_in_bytes; - /** Check that all blocks are in a replaceable state. - @return address of a non-free block - @retval nullptr if all freed */ - inline const buf_block_t *not_freed() const noexcept; -#endif /* UNIV_DEBUG */ - }; public: + /** The requested innodb_buffer_pool_size */ + size_t size_in_bytes_requested; +#if defined __linux__ || !defined DBUG_OFF + /** The minimum allowed innodb_buffer_pool_size in garbage_collect() */ + size_t size_in_bytes_auto_min; +#endif + /** The maximum allowed innodb_buffer_pool_size */ + size_t size_in_bytes_max; + + /** @return the current size of the buffer pool, in bytes */ + size_t curr_pool_size() const noexcept { return size_in_bytes; } + + /** @return the current size of the buffer pool, in pages */ + TPOOL_SUPPRESS_TSAN size_t curr_size() const noexcept { return n_blocks; } + /** @return the maximum usable size of the buffer pool, in pages */ + TPOOL_SUPPRESS_TSAN size_t usable_size() const noexcept + { return n_blocks - n_blocks_to_withdraw - UT_LIST_GET_LEN(withdrawn); } + + /** Determine the used size of the buffer pool in bytes. + @param n_blocks size of the buffer pool in blocks + @return the size needed for n_blocks in bytes, for innodb_page_size */ + static size_t blocks_in_bytes(size_t n_blocks) noexcept; + +#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP) + /** Enable buffers to be dumped to core files. + + A convenience function, not called anyhwere directly however + it is left available for gdb or any debugger to call + in the event that you want all of the memory to be dumped + to a core file. + + @return number of errors found in madvise() calls */ + static int madvise_do_dump() noexcept; +#endif + /** Hash cell chain in page_hash_table */ struct hash_chain { @@ -1258,106 +1245,58 @@ buf_page_t *first; }; private: - /** Withdraw blocks from the buffer pool until meeting withdraw_target. - @return whether retry is needed */ - inline bool withdraw_blocks() noexcept; - - /** Determine if a pointer belongs to a buf_block_t. It can be a pointer to - the buf_block_t itself or a member of it. - @param ptr a pointer that will not be dereferenced - @return whether the ptr belongs to a buf_block_t struct */ - bool is_block_field(const void *ptr) const noexcept - { - const chunk_t *chunk= chunks; - const chunk_t *const echunk= chunk + ut_min(n_chunks, n_chunks_new); - - /* TODO: protect chunks with a mutex (the older pointer will - currently remain during resize()) */ - for (; chunk < echunk; chunk++) - if (ptr >= reinterpret_cast(chunk->blocks) && - ptr < reinterpret_cast(chunk->blocks + chunk->size)) - return true; - return false; - } - - /** Try to reallocate a control block. - @param block control block to reallocate - @return whether the reallocation succeeded */ - inline bool realloc(buf_block_t *block) noexcept; + /** Determine the number of blocks in a buffer pool of a particular size. + @param size_in_bytes innodb_buffer_pool_size in bytes + @return number of buffer pool pages */ + static size_t get_n_blocks(size_t size_in_bytes) noexcept; + + /** The outcome of shrink() */ + enum shrink_status{SHRINK_DONE= -1, SHRINK_IN_PROGRESS= 0, SHRINK_ABORT}; + + /** Attempt to shrink the buffer pool. + @param size requested innodb_buffer_pool_size in bytes + @retval whether the shrinking was completed */ + ATTRIBUTE_COLD shrink_status shrink(size_t size) noexcept; + + /** Finish shrinking the buffer pool. + @param size the new innodb_buffer_pool_size in bytes + @param reduced how much the innodb_buffer_pool_size was reduced */ + inline void shrunk(size_t size, size_t reduced) noexcept; public: - bool is_initialised() const noexcept { return chunks != nullptr; } + bool is_initialised() const noexcept { return memory != nullptr; } /** Create the buffer pool. @return whether the creation failed */ - bool create(); + bool create() noexcept; /** Clean up after successful create() */ void close() noexcept; - /** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */ - inline void resize(); + /** Resize the buffer pool. + @param size requested innodb_buffer_pool_size in bytes + @param thd current connnection */ + ATTRIBUTE_COLD void resize(size_t size, THD *thd) noexcept; -#ifdef __linux__ /** Collect garbage (release pages from the LRU list) */ - inline void garbage_collect(); -#endif - - /** @return whether resize() is in progress */ - bool resize_in_progress() const noexcept - { - return UNIV_UNLIKELY(resizing.load(std::memory_order_relaxed)); - } - - /** @return the current size in blocks */ - size_t get_n_pages() const noexcept - { - ut_ad(is_initialised()); - size_t size= 0; - for (auto j= ut_min(n_chunks_new, n_chunks); j--; ) - size+= chunks[j].size; - return size; - } + inline void garbage_collect() noexcept; - /** Determine whether a frame is intended to be withdrawn during resize(). + /** Determine whether a frame needs to be withdrawn during resize(). @param ptr pointer within a buf_page_t::frame + @param size size_in_bytes_requested @return whether the frame will be withdrawn */ - bool will_be_withdrawn(const byte *ptr) const noexcept + bool will_be_withdrawn(const byte *ptr, size_t size) const noexcept { - ut_ad(n_chunks_new < n_chunks); -#ifdef SAFE_MUTEX - if (resize_in_progress()) - mysql_mutex_assert_owner(&mutex); -#endif /* SAFE_MUTEX */ - - for (const chunk_t *chunk= chunks + n_chunks_new, - * const echunk= chunks + n_chunks; - chunk != echunk; chunk++) - if (ptr >= chunk->blocks->page.frame && - ptr < (chunk->blocks + chunk->size - 1)->page.frame + srv_page_size) - return true; - return false; + const char *p= reinterpret_cast(ptr); + ut_ad(!p || p >= memory); + ut_ad(p < memory + size_in_bytes_max); + return p >= memory + size; } - /** Determine whether a block is intended to be withdrawn during resize(). + /** Withdraw a block if needed in case resize() is shrinking. @param bpage buffer pool block - @return whether the frame will be withdrawn */ - bool will_be_withdrawn(const buf_page_t &bpage) const noexcept - { - ut_ad(n_chunks_new < n_chunks); -#ifdef SAFE_MUTEX - if (resize_in_progress()) - mysql_mutex_assert_owner(&mutex); -#endif /* SAFE_MUTEX */ - - for (const chunk_t *chunk= chunks + n_chunks_new, - * const echunk= chunks + n_chunks; - chunk != echunk; chunk++) - if (&bpage >= &chunk->blocks->page && - &bpage < &chunk->blocks[chunk->size].page) - return true; - return false; - } + @return whether the block was withdrawn */ + ATTRIBUTE_COLD bool withdraw(buf_page_t &bpage) noexcept; /** Release and evict a corrupted page. @param bpage x-latched page that was found corrupted @@ -1371,31 +1310,18 @@ #ifdef UNIV_DEBUG /** Find a block that points to a ROW_FORMAT=COMPRESSED page @param data pointer to the start of a ROW_FORMAT=COMPRESSED page frame + @param shift number of least significant address bits to ignore @return the block @retval nullptr if not found */ - const buf_block_t *contains_zip(const void *data) const noexcept - { - mysql_mutex_assert_owner(&mutex); - for (const chunk_t *chunk= chunks, * const end= chunks + n_chunks; - chunk != end; chunk++) - if (const buf_block_t *block= chunk->contains_zip(data)) - return block; - return nullptr; - } - + const buf_block_t *contains_zip(const void *data, size_t shift= 0) + const noexcept; /** Assert that all buffer pool pages are in a replaceable state */ void assert_all_freed() noexcept; #endif /* UNIV_DEBUG */ #ifdef BTR_CUR_HASH_ADAPT /** Clear the adaptive hash index on all pages in the buffer pool. */ - inline void clear_hash_index() noexcept; - - /** Get a buffer block from an adaptive hash index pointer. - This function does not return if the block is not identified. - @param ptr pointer to within a page frame - @return pointer to block, never NULL */ - inline buf_block_t *block_from_ahi(const byte *ptr) const noexcept; + void clear_hash_index() noexcept; #endif /* BTR_CUR_HASH_ADAPT */ /** @@ -1418,13 +1344,27 @@ return empty_lsn; } - /** Determine if a buffer block was created by chunk_t::create(). - @param block block descriptor (not dereferenced) - @return whether block has been created by chunk_t::create() */ - bool is_uncompressed(const buf_block_t *block) const noexcept + /** Look up the block descriptor for a page frame address. + @param ptr address within a valid page frame + @return the corresponding block descriptor */ + static buf_block_t *block_from(const void *ptr) noexcept; + + /** Access a block while holding the buffer pool mutex. + @param pos position between 0 and get_n_pages() + @return the block descriptor */ + buf_block_t *get_nth_page(size_t pos) const noexcept; + +#ifdef UNIV_DEBUG + /** Determine if an object is within the curr_pool_size() + and associated with an uncompressed page. + @param ptr memory object (not dereferenced) + @return whether the object is valid in the current buffer pool */ + bool is_uncompressed_current(const void *ptr) const noexcept { - return is_block_field(reinterpret_cast(block)); + const ptrdiff_t d= static_cast(ptr) - memory; + return d >= 0 && size_t(d) < curr_pool_size(); } +#endif public: /** page_fix() mode of operation */ @@ -1456,6 +1396,16 @@ buf_block_t *page_fix(const page_id_t id) noexcept { return page_fix(id, nullptr, FIX_WAIT_READ); } + /** Validate a block descriptor. + @param b block descriptor that may be invalid after shrink() + @param latch page_hash latch for id + @param id page identifier + @return b->page.fix() if b->page.id() == id + @retval 0 if b is invalid */ + TRANSACTIONAL_TARGET + uint32_t page_guess(buf_block_t *b, page_hash_latch &latch, + const page_id_t id) noexcept; + /** Decompress a page and relocate the block descriptor @param b buffer-fixed compressed-only ROW_FORMAT=COMPRESSED page @param chain hash table chain for b->id().fold() @@ -1477,7 +1427,6 @@ buf_page_t *bpage= page_hash.get(page_id, chain); if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)]) { - ut_ad(!bpage->in_zip_hash); ut_ad(!bpage->zip.data); if (!allow_watch) bpage= nullptr; @@ -1498,7 +1447,6 @@ ut_ad(bpage.in_file()); if (&bpage < &watch[0] || &bpage >= &watch[array_elements(watch)]) return false; - ut_ad(!bpage.in_zip_hash); ut_ad(!bpage.zip.data); return true; } @@ -1539,23 +1487,30 @@ inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain) noexcept; /** @return whether less than 1/4 of the buffer pool is available */ - TPOOL_SUPPRESS_TSAN - bool running_out() const noexcept - { - return !recv_recovery_is_on() && - UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < - (n_chunks_new * chunks->size) / 4; - } + bool running_out() const noexcept; /** @return whether the buffer pool is running low */ bool need_LRU_eviction() const noexcept; - /** @return whether the buffer pool is shrinking */ - inline bool is_shrinking() const noexcept + /** @return number of blocks resize() needs to evict from the buffer pool */ + size_t is_shrinking() const noexcept + { + mysql_mutex_assert_owner(&mutex); + return n_blocks_to_withdraw + UT_LIST_GET_LEN(withdrawn); + } + + /** @return number of blocks in resize() waiting to be withdrawn */ + size_t to_withdraw() const noexcept { - return n_chunks_new < n_chunks; + mysql_mutex_assert_owner(&mutex); + return n_blocks_to_withdraw; } + /** @return the shrinking size of the buffer pool, in bytes + @retval 0 if resize() is not shrinking the buffer pool */ + size_t shrinking_size() const noexcept + { return is_shrinking() ? size_in_bytes_requested : 0; } + #ifdef UNIV_DEBUG /** Validate the buffer pool. */ void validate() noexcept; @@ -1572,7 +1527,6 @@ mysql_mutex_assert_owner(&mutex); ut_ad(bpage->in_LRU_list); ut_ad(bpage->in_page_hash); - ut_ad(!bpage->in_zip_hash); ut_ad(bpage->in_file()); lru_hp.adjust(bpage); lru_scan_itr.adjust(bpage); @@ -1592,26 +1546,8 @@ /** @name General fields */ /* @{ */ - ulint curr_pool_size; /*!< Current pool size in bytes */ ulint LRU_old_ratio; /*!< Reserve this much of the buffer pool for "old" blocks */ -#ifdef UNIV_DEBUG - ulint buddy_n_frames; /*!< Number of frames allocated from - the buffer pool to the buddy system */ - ulint mutex_exit_forbidden; /*!< Forbid release mutex */ -#endif - ut_allocator allocator; /*!< Allocator used for - allocating memory for the the "chunks" - member. */ - ulint n_chunks; /*!< number of buffer pool chunks */ - ulint n_chunks_new; /*!< new number of buffer pool chunks. - both n_chunks{,new} are protected under - mutex */ - chunk_t* chunks; /*!< buffer pool chunks */ - chunk_t* chunks_old; /*!< old buffer pool chunks to be freed - after resizing buffer pool */ - /** current pool size in pages */ - Atomic_counter curr_size; /** read-ahead request size in pages */ Atomic_counter read_ahead_area; @@ -1723,12 +1659,6 @@ /** Look up a page in a hash bucket chain. */ inline buf_page_t *get(const page_id_t id, const hash_chain &chain) const noexcept; - - /** Exclusively aqcuire all latches */ - inline void write_lock_all() noexcept; - - /** Release all latches */ - inline void write_unlock_all() noexcept; }; /** Buffer pool mutex */ @@ -1745,9 +1675,6 @@ indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */ page_hash_table page_hash; - /** map of block->frame to buf_block_t blocks that belong - to buf_buddy_alloc(); protected by buf_pool.mutex */ - hash_table_t zip_hash; /** number of pending unzip() */ Atomic_counter n_pend_unzip; @@ -1878,30 +1805,29 @@ Set whenever the free list grows, along with a broadcast of done_free. Protected by buf_pool.mutex. */ Atomic_relaxed try_LRU_scan; - /** Whether we have warned to be running out of buffer pool */ - std::atomic_flag LRU_warned; /* @} */ /** @name LRU replacement algorithm fields */ /* @{ */ - UT_LIST_BASE_NODE_T(buf_page_t) free; - /*!< base node of the free - block list */ +private: + /** Whether we have warned to be running out of buffer pool; + only modified by buf_flush_page_cleaner(): + set while holding mutex, cleared while holding flush_list_mutex */ + Atomic_relaxed LRU_warned; + + /** withdrawn blocks during resize() */ + UT_LIST_BASE_NODE_T(buf_page_t) withdrawn; + +public: + /** list of blocks available for allocate() */ + UT_LIST_BASE_NODE_T(buf_page_t) free; + /** broadcast each time when the free list grows or try_LRU_scan is set; protected by mutex */ pthread_cond_t done_free; - UT_LIST_BASE_NODE_T(buf_page_t) withdraw; - /*!< base node of the withdraw - block list. It is only used during - shrinking buffer pool size, not to - reuse the blocks will be removed */ - - ulint withdraw_target;/*!< target length of withdraw - block list, when withdrawing */ - /** "hazard pointer" used during scan of LRU while doing LRU list batch. Protected by buf_pool_t::mutex. */ LRUHp lru_hp; @@ -1942,10 +1868,22 @@ /** Sentinels to detect if pages are read into the buffer pool while a delete-buffering operation is pending. Protected by mutex. */ buf_page_t watch[innodb_purge_threads_MAX + 1]; + + /** Clear LRU_warned */ + void LRU_warned_clear() noexcept + { + mysql_mutex_assert_owner(&flush_list_mutex); + LRU_warned= false; + } + /** Reserve a buffer. */ buf_tmp_buffer_t *io_buf_reserve(bool wait_for_reads) noexcept { return io_buf.reserve(wait_for_reads); } + /** Try to allocate a block. + @return a buffer block + @retval nullptr if no blocks are available */ + buf_block_t *allocate() noexcept; /** Remove a block from flush_list. @param bpage buffer pool page */ void delete_from_flush_list(buf_page_t *bpage) noexcept; @@ -1968,6 +1906,13 @@ /** Issue a warning that we could not free up buffer pool pages. */ ATTRIBUTE_COLD void LRU_warn() noexcept; + /** Print buffer pool flush state information. */ + ATTRIBUTE_COLD void print_flush_info() const noexcept; + + /** Collect buffer pool metadata. + @param pool_info buffer pool metadata */ + void get_info(buf_pool_info_t *pool_info) noexcept; + private: /** Temporary memory for page_compressed and encrypted I/O */ struct io_buf_t @@ -1984,9 +1929,6 @@ /** Reserve a buffer */ buf_tmp_buffer_t *reserve(bool wait_for_reads) noexcept; } io_buf; - - /** whether resize() is in the critical path */ - std::atomic resizing; }; /** The InnoDB buffer pool */ @@ -2135,24 +2077,6 @@ this->old= old; } -#ifdef UNIV_DEBUG -/** Forbid the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_forbid() do { \ - mysql_mutex_assert_owner(&buf_pool.mutex); \ - buf_pool.mutex_exit_forbidden++; \ -} while (0) -/** Allow the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_allow() do { \ - mysql_mutex_assert_owner(&buf_pool.mutex); \ - ut_ad(buf_pool.mutex_exit_forbidden--); \ -} while (0) -#else -/** Forbid the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_forbid() ((void) 0) -/** Allow the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_allow() ((void) 0) -#endif - /********************************************************************** Let us list the consistency conditions for different control block states. diff -Nru mariadb-10.11.11/storage/innobase/include/buf0buf.inl mariadb-10.11.13/storage/innobase/include/buf0buf.inl --- mariadb-10.11.11/storage/innobase/include/buf0buf.inl 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/buf0buf.inl 2025-05-19 16:14:25.000000000 +0000 @@ -37,7 +37,7 @@ /* FIXME: bpage->freed_page_clock is 31 bits */ return((buf_pool.freed_page_clock & ((1UL << 31) - 1)) < (bpage->freed_page_clock - + (buf_pool.curr_size + + (buf_pool.curr_size() * (BUF_LRU_OLD_RATIO_DIV - buf_pool.LRU_old_ratio) / (BUF_LRU_OLD_RATIO_DIV * 4)))); } diff -Nru mariadb-10.11.11/storage/innobase/include/buf0dblwr.h mariadb-10.11.13/storage/innobase/include/buf0dblwr.h --- mariadb-10.11.11/storage/innobase/include/buf0dblwr.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/buf0dblwr.h 2025-05-19 16:14:25.000000000 +0000 @@ -159,6 +159,9 @@ my_cond_wait(&cond, &mutex.m_mutex); mysql_mutex_unlock(&mutex); } + + /** Print double write state information. */ + ATTRIBUTE_COLD void print_info() const noexcept; }; /** The doublewrite buffer */ diff -Nru mariadb-10.11.11/storage/innobase/include/buf0lru.h mariadb-10.11.13/storage/innobase/include/buf0lru.h --- mariadb-10.11.11/storage/innobase/include/buf0lru.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/buf0lru.h 2025-05-19 16:14:25.000000000 +0000 @@ -55,10 +55,6 @@ @return true if found and freed */ bool buf_LRU_scan_and_free_block(ulint limit= ULINT_UNDEFINED); -/** @return a buffer block from the buf_pool.free list -@retval NULL if the free list is empty */ -buf_block_t* buf_LRU_get_free_only(); - /** Get a block from the buf_pool.free list. If the list is empty, blocks will be moved from the end of buf_pool.LRU to buf_pool.free. diff -Nru mariadb-10.11.11/storage/innobase/include/dict0dict.h mariadb-10.11.13/storage/innobase/include/dict0dict.h --- mariadb-10.11.11/storage/innobase/include/dict0dict.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/dict0dict.h 2025-05-19 16:14:25.000000000 +0000 @@ -146,21 +146,21 @@ MDL_ticket **mdl= nullptr) MY_ATTRIBUTE((warn_unused_result)); -/** Decrement the count of open handles */ -void dict_table_close(dict_table_t *table); - -/** Decrements the count of open handles of a table. -@param[in,out] table table -@param[in] dict_locked whether dict_sys.latch is being held -@param[in] thd thread to release MDL -@param[in] mdl metadata lock or NULL if the thread is a - foreground one. */ -void -dict_table_close( - dict_table_t* table, - bool dict_locked, - THD* thd = NULL, - MDL_ticket* mdl = NULL); +/** Release a metadata lock. +@param thd connection that holds mdl +@param mdl metadata lock, or nullptr */ +void mdl_release(THD *thd, MDL_ticket *mdl) noexcept; + +/** Release a table reference and a metadata lock. +@param table referenced table +@param thd connection that holds mdl +@param mdl metadata lock, or nullptr */ +inline void dict_table_close(dict_table_t* table, THD *thd, MDL_ticket *mdl) + noexcept +{ + table->release(); + mdl_release(thd, mdl); +} /*********************************************************************//** Gets the minimum number of bytes per character. @@ -674,7 +674,7 @@ @return estimated number of rows */ inline uint64_t dict_table_get_n_rows(const dict_table_t *table) { - ut_ad(table->stat_initialized); + ut_ad(table->stat_initialized()); return table->stat_n_rows; } @@ -1657,6 +1657,27 @@ dict_table_have_virtual_index( dict_table_t* table); +/** Helper for opening the InnoDB persistent statistics tables */ +class dict_stats final +{ + MDL_context *mdl_context= nullptr; + MDL_ticket *mdl_table= nullptr, *mdl_index= nullptr; + dict_table_t *table_stats= nullptr, *index_stats= nullptr; + +public: + dict_stats()= default; + + /** Open the statistics tables. + @return whether the operation failed */ + bool open(THD *thd) noexcept; + + /** Close the statistics tables after !open_tables(thd). */ + void close() noexcept; + + dict_table_t *table() const noexcept { return table_stats; } + dict_table_t *index() const noexcept { return index_stats; } +}; + #include "dict0dict.inl" #endif diff -Nru mariadb-10.11.11/storage/innobase/include/dict0dict.inl mariadb-10.11.13/storage/innobase/include/dict0dict.inl --- mariadb-10.11.11/storage/innobase/include/dict0dict.inl 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/dict0dict.inl 2025-05-19 16:14:25.000000000 +0000 @@ -1076,8 +1076,8 @@ /** Acquire the table handle. */ inline void dict_table_t::acquire() { - ut_ad(dict_sys.frozen()); - n_ref_count++; + ut_d(const auto old=) n_ref_count++; + ut_ad(old || dict_sys.frozen()); } /** Release the table handle. diff -Nru mariadb-10.11.11/storage/innobase/include/dict0mem.h mariadb-10.11.13/storage/innobase/include/dict0mem.h --- mariadb-10.11.11/storage/innobase/include/dict0mem.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/dict0mem.h 2025-05-19 16:14:25.000000000 +0000 @@ -1106,15 +1106,12 @@ is indexed from 0 to n_uniq-1); This is used when innodb_stats_method is "nulls_ignored". */ - ulint stat_index_size; + uint32_t stat_index_size; /*!< approximate index size in database pages */ - ulint stat_n_leaf_pages; + uint32_t stat_n_leaf_pages; /*!< approximate number of leaf pages in the index tree */ - bool stats_error_printed; - /*!< has persistent statistics error printed - for this index ? */ /* @} */ /** Statistics for defragmentation, these numbers are estimations and could be very inaccurate at certain times, e.g. right after restart, @@ -2358,63 +2355,32 @@ /** Statistics for query optimization. Mostly protected by dict_sys.latch and stats_mutex_lock(). @{ */ - /** TRUE if statistics have been calculated the first time after - database startup or table creation. */ - unsigned stat_initialized:1; - /** Timestamp of last recalc of the stats. */ time_t stats_last_recalc; - /** The two bits below are set in the 'stat_persistent' member. They - have the following meaning: - 1. _ON=0, _OFF=0, no explicit persistent stats setting for this table, - the value of the global srv_stats_persistent is used to determine - whether the table has persistent stats enabled or not - 2. _ON=0, _OFF=1, persistent stats are explicitly disabled for this - table, regardless of the value of the global srv_stats_persistent - 3. _ON=1, _OFF=0, persistent stats are explicitly enabled for this - table, regardless of the value of the global srv_stats_persistent - 4. _ON=1, _OFF=1, not allowed, we assert if this ever happens. */ - #define DICT_STATS_PERSISTENT_ON (1 << 1) - #define DICT_STATS_PERSISTENT_OFF (1 << 2) - - /** Indicates whether the table uses persistent stats or not. See - DICT_STATS_PERSISTENT_ON and DICT_STATS_PERSISTENT_OFF. */ - ib_uint32_t stat_persistent; - - /** The two bits below are set in the 'stats_auto_recalc' member. They - have the following meaning: - 1. _ON=0, _OFF=0, no explicit auto recalc setting for this table, the - value of the global srv_stats_persistent_auto_recalc is used to - determine whether the table has auto recalc enabled or not - 2. _ON=0, _OFF=1, auto recalc is explicitly disabled for this table, - regardless of the value of the global srv_stats_persistent_auto_recalc - 3. _ON=1, _OFF=0, auto recalc is explicitly enabled for this table, - regardless of the value of the global srv_stats_persistent_auto_recalc - 4. _ON=1, _OFF=1, not allowed, we assert if this ever happens. */ - #define DICT_STATS_AUTO_RECALC_ON (1 << 1) - #define DICT_STATS_AUTO_RECALC_OFF (1 << 2) - - /** Indicates whether the table uses automatic recalc for persistent - stats or not. See DICT_STATS_AUTO_RECALC_ON and - DICT_STATS_AUTO_RECALC_OFF. */ - ib_uint32_t stats_auto_recalc; - - /** The number of pages to sample for this table during persistent - stats estimation. If this is 0, then the value of the global - srv_stats_persistent_sample_pages will be used instead. */ - ulint stats_sample_pages; + static constexpr uint32_t STATS_INITIALIZED= 1U; + static constexpr uint32_t STATS_PERSISTENT_ON= 1U << 1; + static constexpr uint32_t STATS_PERSISTENT_OFF= 1U << 2; + static constexpr uint32_t STATS_AUTO_RECALC_ON= 1U << 3; + static constexpr uint32_t STATS_AUTO_RECALC_OFF= 1U << 4; + + /** flags for index cardinality statistics */ + Atomic_relaxed stat; + /** Approximate clustered index size in database pages. */ + uint32_t stat_clustered_index_size; + /** Approximate size of other indexes in database pages. */ + uint32_t stat_sum_of_other_index_sizes; + + + /** The number of pages to sample for this table during persistent + stats estimation. If this is 0, then the value of the global + srv_stats_persistent_sample_pages will be used instead. */ + uint32_t stats_sample_pages; /** Approximate number of rows in the table. We periodically calculate new estimates. */ ib_uint64_t stat_n_rows; - /** Approximate clustered index size in database pages. */ - ulint stat_clustered_index_size; - - /** Approximate size of other indexes in database pages. */ - ulint stat_sum_of_other_index_sizes; - /** How many rows are modified since last stats recalc. When a row is inserted, updated, or deleted, we add 1 to this number; we calculate new estimates for the table and the indexes if the table has changed @@ -2424,7 +2390,7 @@ ib_uint64_t stat_modified_counter; bool stats_error_printed; - /*!< Has persistent stats error beein + /*!< Has persistent stats error been already printed for this table ? */ /* @} */ @@ -2551,6 +2517,35 @@ /** @return the index for that starts with a specific column */ dict_index_t *get_index(const dict_col_t &col) const; + /** @return whether the statistics are initialized */ + static bool stat_initialized(uint32_t stat) noexcept + { return stat & STATS_INITIALIZED; } + + /** @return whether STATS_PERSISTENT is enabled */ + static bool stats_is_persistent(uint32_t stat) noexcept + { + ut_ad(~(stat & (STATS_PERSISTENT_ON | STATS_PERSISTENT_OFF))); + if (stat & STATS_PERSISTENT_ON) return true; + return !(stat & STATS_PERSISTENT_OFF) && srv_stats_persistent; + } + /** @return whether STATS_AUTO_RECALC is enabled */ + static bool stats_is_auto_recalc(uint32_t stat) noexcept + { + ut_ad(stat_initialized(stat)); + ut_ad(~(stat & (STATS_AUTO_RECALC_ON | STATS_AUTO_RECALC_OFF))); + if (stat & STATS_AUTO_RECALC_ON) return true; + return !(stat & STATS_AUTO_RECALC_OFF) && srv_stats_auto_recalc; + } + + /** @return whether the statistics are initialized */ + bool stat_initialized() const noexcept { return stat_initialized(stat); } + /** @return whether STATS_PERSISTENT is enabled */ + bool stats_is_persistent() const noexcept + { return stats_is_persistent(stat); } + /** @return whether STATS_AUTO_RECALC is enabled */ + bool stats_is_auto_recalc() const noexcept + { return stats_is_auto_recalc(stat); } + /** Create metadata. @param name table name @param space tablespace diff -Nru mariadb-10.11.11/storage/innobase/include/dict0stats.h mariadb-10.11.13/storage/innobase/include/dict0stats.h --- mariadb-10.11.11/storage/innobase/include/dict0stats.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/dict0stats.h 2025-05-19 16:14:25.000000000 +0000 @@ -30,84 +30,6 @@ #include "dict0types.h" #include "trx0types.h" -enum dict_stats_upd_option_t { - DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the - statistics using a precise and slow - algo and save them to the persistent - storage, if the persistent storage is - not present then emit a warning and - fall back to transient stats */ - DICT_STATS_RECALC_TRANSIENT,/* (re) calculate the statistics - using an imprecise quick algo - without saving the results - persistently */ - DICT_STATS_EMPTY_TABLE, /* Write all zeros (or 1 where it makes sense) - into a table and its indexes' statistics - members. The resulting stats correspond to an - empty table. If the table is using persistent - statistics, then they are saved on disk. */ - DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* fetch the stats - from the persistent storage if the in-memory - structures have not been initialized yet, - otherwise do nothing */ -}; - -/*********************************************************************//** -Set the persistent statistics flag for a given table. This is set only -in the in-memory table object and is not saved on disk. It will be read -from the .frm file upon first open from MySQL after a server restart. */ -UNIV_INLINE -void -dict_stats_set_persistent( -/*======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool ps_on, /*!< in: persistent stats explicitly enabled */ - ibool ps_off) /*!< in: persistent stats explicitly disabled */ - MY_ATTRIBUTE((nonnull)); - -/** @return whether persistent statistics is enabled for a given table */ -UNIV_INLINE -bool -dict_stats_is_persistent_enabled(const dict_table_t* table) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************//** -Set the auto recalc flag for a given table (only honored for a persistent -stats enabled table). The flag is set only in the in-memory table object -and is not saved in InnoDB files. It will be read from the .frm file upon -first open from MySQL after a server restart. */ -UNIV_INLINE -void -dict_stats_auto_recalc_set( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool auto_recalc_on, /*!< in: explicitly enabled */ - ibool auto_recalc_off); /*!< in: explicitly disabled */ - -/** @return whether auto recalc is enabled for a given table*/ -UNIV_INLINE -bool -dict_stats_auto_recalc_is_enabled(const dict_table_t* table) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************//** -Initialize table's stats for the first time when opening a table. */ -UNIV_INLINE -void -dict_stats_init( -/*============*/ - dict_table_t* table); /*!< in/out: table */ - -/*********************************************************************//** -Deinitialize table's stats after the last close of the table. This is -used to detect "FLUSH TABLE" and refresh the stats upon next open. */ -UNIV_INLINE -void -dict_stats_deinit( -/*==============*/ - dict_table_t* table) /*!< in/out: table */ - MY_ATTRIBUTE((nonnull)); - #ifdef WITH_WSREP /** Update the table modification counter and if necessary, schedule new estimates for table and index statistics to be calculated. @@ -124,19 +46,6 @@ # define dict_stats_update_if_needed(t,trx) dict_stats_update_if_needed_func(t) #endif -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. -@return DB_* error code or DB_SUCCESS */ -dberr_t -dict_stats_update( -/*==============*/ - dict_table_t* table, /*!< in/out: table */ - dict_stats_upd_option_t stats_upd_option); - /*!< in: whether to (re) calc - the stats or to fetch them from - the persistent storage */ - /** Execute DELETE FROM mysql.innodb_table_stats @param database_name database name @param table_name table name @@ -173,6 +82,50 @@ dict_index_t* index) /*!< in/out: index */ MY_ATTRIBUTE((nonnull)); +enum dict_stats_schema_check { + /** The InnoDB persistent statistics tables do not exist. */ + SCHEMA_NOT_EXIST= -1, + /** The schema of the InnoDB persistent statistics tables is valid. */ + SCHEMA_OK= 0, + /** The schema is invalid. */ + SCHEMA_INVALID +}; + +/** @return whether the persistent statistics storage is usable */ +dict_stats_schema_check +dict_stats_persistent_storage_check(bool dict_already_locked= false) noexcept; + +/** Save the persistent statistics of a table or an index. +@param table table whose stats to save +@param only_for_index the index ID to save statistics for (0=all) +@return DB_SUCCESS or error code */ +dberr_t dict_stats_save(dict_table_t* table, index_id_t index_id= 0); + +/** Read the stored persistent statistics of a table. */ +dberr_t dict_stats_fetch_from_ps(dict_table_t *table); + +/** +Calculate new estimates for table and index statistics. This function +is relatively quick and is used to calculate non-persistent statistics. +@param table table for which the non-persistent statistics are being updated +@return error code +@retval DB_SUCCESS_LOCKED REC if the table under bulk insert operation */ +dberr_t dict_stats_update_transient(dict_table_t *table) noexcept; + +/** +Calculate new estimates for table and index statistics. This function +is slower than dict_stats_update_transient(). +@param table table for which the persistent statistics are being updated +@return DB_SUCCESS or error code +@retval DB_SUCCESS_LOCKED_REC if the table under bulk insert operation */ +dberr_t dict_stats_update_persistent(dict_table_t *table) noexcept; + +/** +Try to calculate and save new estimates for persistent statistics. +If persistent statistics are not enabled for the table or not available, +this does nothing. */ +dberr_t dict_stats_update_persistent_try(dict_table_t *table); + /** Rename a table in InnoDB persistent stats storage. @param old_name old table name @param new_name new table name @@ -229,8 +182,6 @@ dict_stats_report_error(dict_table_t* table, bool defragment = false) MY_ATTRIBUTE((nonnull, warn_unused_result)); -#include "dict0stats.inl" - #ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS void test_dict_stats_all(); #endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */ @@ -244,4 +195,8 @@ dict_stats_empty_table( dict_table_t* table, bool empty_defrag_stats); + +/** Clear the statistics for a table and save them if +persistent statistics are enabled. */ +void dict_stats_empty_table_and_save(dict_table_t *table); #endif /* dict0stats_h */ diff -Nru mariadb-10.11.11/storage/innobase/include/dict0stats.inl mariadb-10.11.13/storage/innobase/include/dict0stats.inl --- mariadb-10.11.11/storage/innobase/include/dict0stats.inl 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/dict0stats.inl 1970-01-01 00:00:00.000000000 +0000 @@ -1,219 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0stats.ic -Code used for calculating and manipulating table statistics. - -Created Jan 23, 2012 Vasil Dimov -*******************************************************/ - -#include "dict0dict.h" -#include "srv0srv.h" - -/*********************************************************************//** -Set the persistent statistics flag for a given table. This is set only -in the in-memory table object and is not saved on disk. It will be read -from the .frm file upon first open from MySQL after a server restart. */ -UNIV_INLINE -void -dict_stats_set_persistent( -/*======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool ps_on, /*!< in: persistent stats explicitly enabled */ - ibool ps_off) /*!< in: persistent stats explicitly disabled */ -{ - /* Not allowed to have both flags set, but a CREATE or ALTER - statement that contains "STATS_PERSISTENT=0 STATS_PERSISTENT=1" would - end up having both set. In this case we clear the OFF flag. */ - if (ps_on && ps_off) { - ps_off = FALSE; - } - - ib_uint32_t stat_persistent = 0; - - if (ps_on) { - stat_persistent |= DICT_STATS_PERSISTENT_ON; - } - - if (ps_off) { - stat_persistent |= DICT_STATS_PERSISTENT_OFF; - } - - /* we rely on this assignment to be atomic */ - table->stat_persistent = stat_persistent; -} - -/** @return whether persistent statistics is enabled for a given table */ -UNIV_INLINE -bool -dict_stats_is_persistent_enabled(const dict_table_t* table) -{ - /* Because of the nature of this check (non-locking) it is possible - that a table becomes: - * PS-disabled immediately after this function has returned TRUE or - * PS-enabled immediately after this function has returned FALSE. - This means that it is possible that we do: - + dict_stats_update(DICT_STATS_RECALC_PERSISTENT) on a table that has - just been PS-disabled or - + dict_stats_update(DICT_STATS_RECALC_TRANSIENT) on a table that has - just been PS-enabled. - This is acceptable. Avoiding this would mean that we would have to - hold dict_sys.latch or stats_mutex_lock() like for accessing the - other ::stat_ members which would be too big performance penalty, - especially when this function is called from - dict_stats_update_if_needed(). */ - - /* we rely on this read to be atomic */ - ib_uint32_t stat_persistent = table->stat_persistent; - - if (stat_persistent & DICT_STATS_PERSISTENT_ON) { - ut_ad(!(stat_persistent & DICT_STATS_PERSISTENT_OFF)); - return(true); - } else if (stat_persistent & DICT_STATS_PERSISTENT_OFF) { - return(false); - } else { - return(srv_stats_persistent); - } -} - -/*********************************************************************//** -Set the auto recalc flag for a given table (only honored for a persistent -stats enabled table). The flag is set only in the in-memory table object -and is not saved in InnoDB files. It will be read from the .frm file upon -first open from MySQL after a server restart. */ -UNIV_INLINE -void -dict_stats_auto_recalc_set( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool auto_recalc_on, /*!< in: explicitly enabled */ - ibool auto_recalc_off) /*!< in: explicitly disabled */ -{ - ut_ad(!auto_recalc_on || !auto_recalc_off); - - ib_uint32_t stats_auto_recalc = 0; - - if (auto_recalc_on) { - stats_auto_recalc |= DICT_STATS_AUTO_RECALC_ON; - } - - if (auto_recalc_off) { - stats_auto_recalc |= DICT_STATS_AUTO_RECALC_OFF; - } - - /* we rely on this assignment to be atomic */ - table->stats_auto_recalc = stats_auto_recalc; -} - -/** @return whether auto recalc is enabled for a given table*/ -UNIV_INLINE -bool -dict_stats_auto_recalc_is_enabled(const dict_table_t* table) -{ - /* we rely on this read to be atomic */ - ib_uint32_t stats_auto_recalc = table->stats_auto_recalc; - - if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_ON) { - ut_ad(!(stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF)); - return(true); - } else if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF) { - return(false); - } else { - return(srv_stats_auto_recalc); - } -} - -/*********************************************************************//** -Initialize table's stats for the first time when opening a table. */ -UNIV_INLINE -void -dict_stats_init( -/*============*/ - dict_table_t* table) /*!< in/out: table */ -{ - ut_ad(!table->stats_mutex_is_owner()); - - if (table->stat_initialized) { - return; - } - - dict_stats_upd_option_t opt; - - if (dict_stats_is_persistent_enabled(table)) { - opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY; - } else { - opt = DICT_STATS_RECALC_TRANSIENT; - } - - dict_stats_update(table, opt); -} - -/*********************************************************************//** -Deinitialize table's stats after the last close of the table. This is -used to detect "FLUSH TABLE" and refresh the stats upon next open. */ -UNIV_INLINE -void -dict_stats_deinit( -/*==============*/ - dict_table_t* table) /*!< in/out: table */ -{ - ut_ad(table->stats_mutex_is_owner()); - ut_ad(table->get_ref_count() == 0); - -#ifdef HAVE_valgrind - if (!table->stat_initialized) { - return; - } - - MEM_UNDEFINED(&table->stat_n_rows, sizeof table->stat_n_rows); - MEM_UNDEFINED(&table->stat_clustered_index_size, - sizeof table->stat_clustered_index_size); - MEM_UNDEFINED(&table->stat_sum_of_other_index_sizes, - sizeof table->stat_sum_of_other_index_sizes); - MEM_UNDEFINED(&table->stat_modified_counter, - sizeof table->stat_modified_counter); - - dict_index_t* index; - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - MEM_UNDEFINED( - index->stat_n_diff_key_vals, - index->n_uniq - * sizeof index->stat_n_diff_key_vals[0]); - MEM_UNDEFINED( - index->stat_n_sample_sizes, - index->n_uniq - * sizeof index->stat_n_sample_sizes[0]); - MEM_UNDEFINED( - index->stat_n_non_null_key_vals, - index->n_uniq - * sizeof index->stat_n_non_null_key_vals[0]); - MEM_UNDEFINED( - &index->stat_index_size, - sizeof(index->stat_index_size)); - MEM_UNDEFINED( - &index->stat_n_leaf_pages, - sizeof(index->stat_n_leaf_pages)); - } -#endif /* HAVE_valgrind */ - table->stat_initialized = FALSE; -} diff -Nru mariadb-10.11.11/storage/innobase/include/fil0fil.h mariadb-10.11.13/storage/innobase/include/fil0fil.h --- mariadb-10.11.11/storage/innobase/include/fil0fil.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/fil0fil.h 2025-05-19 16:14:25.000000000 +0000 @@ -351,7 +351,7 @@ /** fil_system.spaces chain node */ fil_space_t *hash= nullptr; /** log_sys.get_lsn() of the most recent fil_names_write_if_was_clean(). - Reset to 0 by fil_names_clear(). Protected by log_sys.mutex. + Reset to 0 by fil_names_clear(). Protected by log_sys.latch_have_wr(). If and only if this is nonzero, the tablespace will be in named_spaces. */ lsn_t max_lsn= 0; /** base node for the chain of data files; multiple entries are @@ -422,7 +422,7 @@ bool being_imported= false; /** Whether any corrupton of this tablespace has been reported */ - mutable std::atomic_flag is_corrupted{false}; + mutable std::atomic_flag is_corrupted= ATOMIC_FLAG_INIT; public: /** mutex to protect freed_ranges and last_freed_lsn */ @@ -1527,7 +1527,10 @@ inline void fil_space_t::reacquire() noexcept { - ut_d(uint32_t n=) n_pending.fetch_add(1, std::memory_order_relaxed); +#ifdef SAFE_MUTEX + uint32_t n= +#endif + n_pending.fetch_add(1, std::memory_order_relaxed); #ifdef SAFE_MUTEX if (mysql_mutex_is_owner(&fil_system.mutex)) return; ut_ad(n & PENDING); diff -Nru mariadb-10.11.11/storage/innobase/include/fsp0fsp.h mariadb-10.11.13/storage/innobase/include/fsp0fsp.h --- mariadb-10.11.11/storage/innobase/include/fsp0fsp.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/fsp0fsp.h 2025-05-19 16:14:25.000000000 +0000 @@ -355,9 +355,9 @@ @param[out] used number of pages that are used (not more than reserved) @param[in,out] mtr mini-transaction @return number of reserved pages */ -ulint fseg_n_reserved_pages(const buf_block_t &block, - const fseg_header_t *header, ulint *used, - mtr_t *mtr) +uint32_t fseg_n_reserved_pages(const buf_block_t &block, + const fseg_header_t *header, uint32_t *used, + mtr_t *mtr) noexcept MY_ATTRIBUTE((nonnull)); /**********************************************************************//** Allocates a single free page from a segment. This function implements diff -Nru mariadb-10.11.11/storage/innobase/include/ibuf0ibuf.h mariadb-10.11.13/storage/innobase/include/ibuf0ibuf.h --- mariadb-10.11.11/storage/innobase/include/ibuf0ibuf.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/ibuf0ibuf.h 2025-05-19 16:14:25.000000000 +0000 @@ -62,11 +62,11 @@ /** Insert buffer struct */ struct ibuf_t{ - Atomic_relaxed size; /*!< current size of the ibuf index + Atomic_relaxed size; /*!< current size of the ibuf index tree, in pages */ - Atomic_relaxed max_size; /*!< recommended maximum size of the + Atomic_relaxed max_size;/*!< recommended maximum size of the ibuf index tree, in pages */ - ulint seg_size; /*!< allocated pages of the file + uint32_t seg_size; /*!< allocated pages of the file segment containing ibuf header and tree */ bool empty; /*!< Protected by the page @@ -75,8 +75,8 @@ (FSP_IBUF_TREE_ROOT_PAGE_NO). true if and only if the insert buffer tree is empty. */ - ulint free_list_len; /*!< length of the free list */ - ulint height; /*!< tree height */ + uint8_t height; /*!< tree height */ + uint32_t free_list_len; /*!< length of the free list */ dict_index_t* index; /*!< insert buffer index */ /** number of pages merged */ diff -Nru mariadb-10.11.11/storage/innobase/include/log0log.h mariadb-10.11.13/storage/innobase/include/log0log.h --- mariadb-10.11.11/storage/innobase/include/log0log.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/log0log.h 2025-05-19 16:14:25.000000000 +0000 @@ -64,20 +64,19 @@ /** Write to the log file up to the last log entry. @param durable whether to wait for a durable write to complete */ -void log_buffer_flush_to_disk(bool durable= true); - +void log_buffer_flush_to_disk(bool durable= true) noexcept; /** Prepare to invoke log_write_and_flush(), before acquiring log_sys.latch. */ -ATTRIBUTE_COLD void log_write_and_flush_prepare(); +ATTRIBUTE_COLD void log_write_and_flush_prepare() noexcept; /** Durably write the log up to log_sys.get_lsn(). */ -ATTRIBUTE_COLD void log_write_and_flush(); +ATTRIBUTE_COLD void log_write_and_flush() noexcept; /** Make a checkpoint */ -ATTRIBUTE_COLD void log_make_checkpoint(); +ATTRIBUTE_COLD void log_make_checkpoint() noexcept; /** Make a checkpoint at the latest lsn on shutdown. */ -ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown(); +ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept; /******************************************************//** Prints info of the log. */ @@ -167,40 +166,35 @@ static constexpr lsn_t FIRST_LSN= START_OFFSET; private: - /** the lock bit in buf_free */ - static constexpr size_t buf_free_LOCK= ~(~size_t{0} >> 1); + /** the least significant bit of the write_to_buf buffer */ + static constexpr size_t WRITE_TO_BUF_SHIFT{34}; + /** write_lsn_offset component for incrementing write_to_buf */ + static constexpr uint64_t WRITE_TO_BUF{1ULL << WRITE_TO_BUF_SHIFT}; + /** write_lsn_offset flag to indicate that append_prepare_wait() is active */ + static constexpr uint64_t WRITE_BACKOFF{1ULL << 33}; + + /** The current log sequence number, relative to base_lsn, and flags; + may be modified while latch_have_any() */ alignas(CPU_LEVEL1_DCACHE_LINESIZE) - /** first free offset within buf used; - the most significant bit is set by lock_lsn() to protect this field - as well as write_to_buf, waits */ - std::atomic buf_free; -public: - /** number of write requests (to buf); protected by lock_lsn() or lsn_lock */ - size_t write_to_buf; - /** log record buffer, written to by mtr_t::commit() */ - byte *buf; -private: - /** The log sequence number of the last change of durable InnoDB files; - protected by lock_lsn() or lsn_lock or latch.wr_lock() */ - std::atomic lsn; + Atomic_relaxed write_lsn_offset; + /** the LSN of the last write_buf() or persist(); protected by latch */ + std::atomic base_lsn; /** the first guaranteed-durable log sequence number */ std::atomic flushed_to_disk_lsn; public: - /** number of append_prepare_wait(); protected by lock_lsn() or lsn_lock */ - size_t waits; - /** innodb_log_buffer_size (size of buf,flush_buf if !is_mmap(), in bytes) */ + /** innodb_log_buffer_size (usable append_prepare() size in bytes) */ unsigned buf_size; /** log file size in bytes, including the header */ lsn_t file_size; #ifdef LOG_LATCH_DEBUG typedef srw_lock_debug log_rwlock; - typedef srw_mutex log_lsn_lock; bool latch_have_wr() const { return latch.have_wr(); } bool latch_have_rd() const { return latch.have_rd(); } bool latch_have_any() const { return latch.have_any(); } #else + typedef srw_lock log_rwlock; # ifndef UNIV_DEBUG # elif defined SUX_LOCK_GENERIC bool latch_have_wr() const { return true; } @@ -211,23 +205,23 @@ bool latch_have_rd() const { return latch.is_locked(); } bool latch_have_any() const { return latch.is_locked(); } # endif -# ifdef __aarch64__ - /* On ARM, we spin more */ - typedef srw_spin_lock log_rwlock; - typedef pthread_mutex_wrapper log_lsn_lock; -# else - typedef srw_lock log_rwlock; - typedef srw_mutex log_lsn_lock; -# endif #endif - /** exclusive latch for checkpoint, shared for mtr_t::commit() to buf */ - alignas(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock latch; + /** latch_have_wr() for checkpoint, latch_have_any() for append_prepare() */ + log_rwlock latch; + + /** log record buffer, written to by mtr_t::commit() */ + alignas(CPU_LEVEL1_DCACHE_LINESIZE) byte *buf; + + /** number of write requests to buf, + excluding (write_lsn_offset & WRITE_TO_BUF); + protected by latch.wr_lock() */ + size_t write_to_buf; /** number of writes from buf or flush_buf to log; protected by latch.wr_lock() */ - ulint write_to_log; + size_t write_to_log; - /** Last written LSN */ + /** Last written LSN; protected by latch */ lsn_t write_lsn; /** Buffer for writing data to ib_logfile0, or nullptr if is_mmap(). @@ -241,8 +235,6 @@ Atomic_relaxed checkpoint_pending; /** next checkpoint number (protected by latch.wr_lock()) */ byte next_checkpoint_no; - /** recommended maximum buf_free size, after which the buffer is flushed */ - unsigned max_buf_free; /** Log sequence number when a log file overwrite (broken crash recovery) was noticed. Protected by latch.wr_lock(). */ lsn_t overwrite_warned; @@ -266,12 +258,6 @@ /** Buffer for writing to resize_log; @see flush_buf */ byte *resize_flush_buf; - /** Special implementation of lock_lsn() for IA-32 and AMD64 */ - void lsn_lock_bts() noexcept; - /** Acquire a lock for updating buf_free and related fields. - @return the value of buf_free */ - size_t lock_lsn() noexcept; - /** log sequence number when log resizing was initiated; 0 if the log is not being resized, 1 if resize_start() is in progress */ std::atomic resize_lsn; @@ -303,7 +289,6 @@ bool log_maybe_unbuffered; # endif #endif - /** Fields involved in checkpoints @{ */ lsn_t log_capacity; /*!< capacity of the log; if the checkpoint age exceeds this, it is @@ -326,34 +311,26 @@ /* @} */ private: - /** A lock when the spin-only lock_lsn() is not being used */ - log_lsn_lock lsn_lock; + /** the thread that initiated resize_lsn() */ + Atomic_relaxed resize_initiator; +#ifdef HAVE_PMEM + /** mutex protecting wrap-around in resize_write() */ + srw_mutex resize_wrap_mutex; +#endif public: + /** number of long append_prepare_wait(); protected by latch_have_wr() */ + size_t waits; - bool is_initialised() const noexcept { return max_buf_free != 0; } - - /** whether there is capacity in the log buffer */ - bool buf_free_ok() const noexcept - { - ut_ad(!is_mmap()); - return (buf_free.load(std::memory_order_relaxed) & ~buf_free_LOCK) < - max_buf_free; - } - + bool is_initialised() const noexcept + { return base_lsn.load(std::memory_order_relaxed) != 0; } inline void set_recovered() noexcept; - void set_buf_free(size_t f) noexcept - { ut_ad(f < buf_free_LOCK); buf_free.store(f, std::memory_order_relaxed); } - bool is_mmap() const noexcept { return !flush_buf; } /** @return whether a handle to the log is open; is_mmap() && !is_opened() holds for PMEM */ bool is_opened() const noexcept { return log.is_opened(); } - /** @return target write LSN to react on !buf_free_ok() */ - inline lsn_t get_write_target() const; - /** @return LSN at which log resizing was started and is still in progress @retval 0 if no log resizing is in progress @retval 1 if resize_start() is in progress */ @@ -367,11 +344,17 @@ /** Start resizing the log and release the exclusive latch. @param size requested new file_size + @param thd the current thread identifier @return whether the resizing was started successfully */ - resize_start_status resize_start(os_offset_t size) noexcept; + resize_start_status resize_start(os_offset_t size, void *thd) noexcept; - /** Abort any resize_start(). */ - void resize_abort() noexcept; + /** Abort a resize_start() that we started. + @param thd thread identifier that had been passed to resize_start() */ + void resize_abort(void *thd) noexcept; + + /** @return whether a particular resize_start() is in progress */ + bool resize_running(void *thd) const noexcept + { return thd == resize_initiator; } /** Replicate a write to the log. @param lsn start LSN @@ -400,53 +383,64 @@ { return resize_buf + resize_target; } /** Initialise the redo log subsystem. */ - void create(); + void create() noexcept; /** Attach a log file. @return whether the memory allocation succeeded */ - bool attach(log_file_t file, os_offset_t size); + bool attach(log_file_t file, os_offset_t size) noexcept; /** Disable memory-mapped access (update log_mmap) */ - void clear_mmap(); - void close_file(bool really_close= true); + void clear_mmap() noexcept; + void close_file(bool really_close= true) noexcept; #if defined __linux__ || defined _WIN32 /** Try to enable or disable file system caching (update log_buffered) */ - void set_buffered(bool buffered); + void set_buffered(bool buffered) noexcept; #endif /** Calculate the checkpoint safety margins. */ - static void set_capacity(); + static void set_capacity() noexcept; /** Write a log file header. @param buf log header buffer @param lsn log sequence number corresponding to log_sys.START_OFFSET @param encrypted whether the log is encrypted */ - static void header_write(byte *buf, lsn_t lsn, bool encrypted); + static void header_write(byte *buf, lsn_t lsn, bool encrypted) noexcept; - lsn_t get_lsn(std::memory_order order= std::memory_order_relaxed) const - { return lsn.load(order); } + /** @return a lower bound estimate of get_lsn(), + using acquire-release ordering with write_buf() or persist(); + this is exact unless append_prepare_wait() is pending */ + lsn_t get_lsn_approx() const noexcept + { + /* acquire-release ordering with write_buf() and persist() */ + lsn_t lsn= base_lsn.load(std::memory_order_acquire); + lsn += write_lsn_offset.load(std::memory_order_relaxed) & + (WRITE_BACKOFF - 1); + return lsn; + } + + /** @return the current log sequence number (logical time stamp) */ + lsn_t get_lsn() const noexcept + { + ut_ad(latch_have_wr()); + return base_lsn.load(std::memory_order_relaxed) + + (write_lsn_offset & (WRITE_BACKOFF - 1)); + } lsn_t get_flushed_lsn(std::memory_order order= std::memory_order_acquire) const noexcept { return flushed_to_disk_lsn.load(order); } /** Initialize the LSN on initial log file creation. */ - lsn_t init_lsn() noexcept - { - latch.wr_lock(SRW_LOCK_CALL); - const lsn_t lsn{get_lsn()}; - flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); - write_lsn= lsn; - latch.wr_unlock(); - return lsn; - } + inline lsn_t init_lsn() noexcept; void set_recovered_lsn(lsn_t lsn) noexcept { ut_ad(latch_have_wr()); - write_lsn= lsn; - this->lsn.store(lsn, std::memory_order_relaxed); + uint64_t lsn_offset= ((write_size - 1) & (lsn - first_lsn)); + write_lsn_offset= lsn_offset; + base_lsn.store(lsn - lsn_offset, std::memory_order_relaxed); flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); + write_lsn= lsn; } #ifdef HAVE_PMEM @@ -481,25 +475,19 @@ private: /** Update writer and mtr_t::finisher */ - void writer_update() noexcept; + void writer_update(bool resizing) noexcept; /** Wait in append_prepare() for buffer to become available - @tparam spin whether to use the spin-only lock_lsn() - @param b the value of buf_free - @param ex whether log_sys.latch is exclusively locked - @param lsn log sequence number to write up to - @return the new value of buf_free */ - template - ATTRIBUTE_COLD size_t append_prepare_wait(size_t b, bool ex, lsn_t lsn) - noexcept; + @param late whether the WRITE_BACKOFF flag had already been set + @param ex whether log_sys.latch is exclusively locked */ + ATTRIBUTE_COLD void append_prepare_wait(bool late, bool ex) noexcept; public: /** Reserve space in the log buffer for appending data. - @tparam spin whether to use the spin-only lock_lsn() @tparam mmap log_sys.is_mmap() @param size total length of the data to append(), in bytes @param ex whether log_sys.latch is exclusively locked @return the start LSN and the buffer position for append() */ - template + template std::pair append_prepare(size_t size, bool ex) noexcept; /** Append a string of bytes to the redo log. @@ -570,7 +558,10 @@ /** Wait for a log checkpoint if needed. NOTE that this function may only be called while not holding any synchronization objects except dict_sys.latch. */ -void log_free_check(); +void log_free_check() noexcept; + +/** @return the current log sequence number (may be stale) */ +lsn_t log_get_lsn() noexcept; /** Release the latches that protect log resizing. */ -void log_resize_release(); +void log_resize_release() noexcept; diff -Nru mariadb-10.11.11/storage/innobase/include/log0recv.h mariadb-10.11.13/storage/innobase/include/log0recv.h --- mariadb-10.11.11/storage/innobase/include/log0recv.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/log0recv.h 2025-05-19 16:14:25.000000000 +0000 @@ -118,15 +118,17 @@ const fil_space_t *space= nullptr, byte *tmp_buf= nullptr) const noexcept; - /** Find the doublewrite copy of an encrypted page with the - smallest FIL_PAGE_LSN that is large enough for recovery. + /** Find the doublewrite copy of an encrypted/page_compressed + page with the smallest FIL_PAGE_LSN that is large enough for + recovery. @param space tablespace object @param page_no page number to find - @param buf buffer for unencrypted page + @param buf buffer for unencrypted/uncompressed page @return buf @retval nullptr if the page was not found in doublewrite buffer */ - byte *find_encrypted_page(const fil_node_t &space, uint32_t page_no, - byte *buf) noexcept; + ATTRIBUTE_COLD byte *find_deferred_page(const fil_node_t &space, + uint32_t page_no, + byte *buf) noexcept; /** Restore the first page of the given tablespace from doublewrite buffer. diff -Nru mariadb-10.11.11/storage/innobase/include/mtr0mtr.h mariadb-10.11.13/storage/innobase/include/mtr0mtr.h --- mariadb-10.11.11/storage/innobase/include/mtr0mtr.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/mtr0mtr.h 2025-05-19 16:14:25.000000000 +0000 @@ -700,19 +700,19 @@ @param mtr mini-transaction @param lsns {start_lsn,flush_ahead} */ template - static void commit_log(mtr_t *mtr, std::pair lsns); + static void commit_log(mtr_t *mtr, std::pair lsns) + noexcept; /** Append the redo log records to the redo log buffer. @return {start_lsn,flush_ahead} */ std::pair do_write(); /** Append the redo log records to the redo log buffer. - @tparam spin whether to use the spin-only log_sys.lock_lsn() @tparam mmap log_sys.is_mmap() @param mtr mini-transaction @param len number of bytes to write @return {start_lsn,flush_ahead} */ - template static + template static std::pair finish_writer(mtr_t *mtr, size_t len); /** The applicable variant of commit_log() */ @@ -723,9 +723,6 @@ std::pair finish_write(size_t len) { return finisher(this, len); } public: - /** Poll interval in log_sys.lock_lsn(); 0 to use log_sys.lsn_lock. - Protected by LOCK_global_system_variables and log_sys.latch. */ - static unsigned spin_wait_delay; /** Update finisher when spin_wait_delay is changing to or from 0. */ static void finisher_update(); private: diff -Nru mariadb-10.11.11/storage/innobase/include/os0file.h mariadb-10.11.13/storage/innobase/include/os0file.h --- mariadb-10.11.11/storage/innobase/include/os0file.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/os0file.h 2025-05-19 16:14:25.000000000 +0000 @@ -1003,6 +1003,8 @@ size_t os_aio_pending_reads_approx() noexcept; /** @return number of pending writes */ size_t os_aio_pending_writes() noexcept; +/** @return approximate number of pending writes */ +size_t os_aio_pending_writes_approx() noexcept; /** Wait until there are no pending asynchronous writes. @param declare whether the wait will be declared in tpool */ diff -Nru mariadb-10.11.11/storage/innobase/include/row0row.h mariadb-10.11.13/storage/innobase/include/row0row.h --- mariadb-10.11.11/storage/innobase/include/row0row.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/row0row.h 2025-05-19 16:14:25.000000000 +0000 @@ -328,22 +328,6 @@ mtr_t* mtr) /*!< in: mtr */ MY_ATTRIBUTE((nonnull, warn_unused_result)); -/** Parse the integer data from specified data, which could be -DATA_INT, DATA_FLOAT or DATA_DOUBLE. If the value is less than 0 -and the type is not unsigned then we reset the value to 0 -@param[in] data data to read -@param[in] len length of data -@param[in] mtype mtype of data -@param[in] unsigned_type if the data is unsigned -@return the integer value from the data */ -inline -ib_uint64_t -row_parse_int( - const byte* data, - ulint len, - ulint mtype, - bool unsigned_type); - /** Result of row_search_index_entry */ enum row_search_result { ROW_FOUND = 0, /*!< the record was found */ diff -Nru mariadb-10.11.11/storage/innobase/include/row0row.inl mariadb-10.11.13/storage/innobase/include/row0row.inl --- mariadb-10.11.11/storage/innobase/include/row0row.inl 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/row0row.inl 2025-05-19 16:14:25.000000000 +0000 @@ -170,52 +170,3 @@ } } } - -/** Parse the integer data from specified data, which could be -DATA_INT, DATA_FLOAT or DATA_DOUBLE. If the value is less than 0 -and the type is not unsigned then we reset the value to 0 -@param[in] data data to read -@param[in] len length of data -@param[in] mtype mtype of data -@param[in] unsigned_type if the data is unsigned -@return the integer value from the data */ -ib_uint64_t -row_parse_int( - const byte* data, - ulint len, - ulint mtype, - bool unsigned_type) -{ - ib_uint64_t value = 0; - - switch (mtype) { - case DATA_INT: - - ut_a(len <= sizeof value); - value = mach_read_int_type(data, len, unsigned_type); - break; - - case DATA_FLOAT: - - ut_a(len == sizeof(float)); - value = static_cast(mach_float_read(data)); - break; - - case DATA_DOUBLE: - - ut_a(len == sizeof(double)); - value = static_cast(mach_double_read(data)); - break; - - default: - ut_error; - - } - - if (!unsigned_type && static_cast(value) < 0) { - value = 0; - } - - return(value); -} - diff -Nru mariadb-10.11.11/storage/innobase/include/row0sel.h mariadb-10.11.13/storage/innobase/include/row0sel.h --- mariadb-10.11.11/storage/innobase/include/row0sel.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/row0sel.h 2025-05-19 16:14:25.000000000 +0000 @@ -182,9 +182,8 @@ @param[in] index index starting with an AUTO_INCREMENT column @return the largest AUTO_INCREMENT value @retval 0 if no records were found */ -ib_uint64_t -row_search_max_autoinc(dict_index_t* index) - MY_ATTRIBUTE((nonnull, warn_unused_result)); +uint64_t row_search_max_autoinc(dict_index_t *index) noexcept + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** A structure for caching column values for prefetched rows */ struct sel_buf_t{ diff -Nru mariadb-10.11.11/storage/innobase/include/srv0srv.h mariadb-10.11.13/storage/innobase/include/srv0srv.h --- mariadb-10.11.11/storage/innobase/include/srv0srv.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/srv0srv.h 2025-05-19 16:14:25.000000000 +0000 @@ -223,17 +223,6 @@ extern my_bool srv_adaptive_flushing; extern my_bool srv_flush_sync; -/** Requested size in bytes */ -extern ulint srv_buf_pool_size; -/** Requested buffer pool chunk size */ -extern size_t srv_buf_pool_chunk_unit; -/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/ -/** Previously requested size */ -extern ulint srv_buf_pool_old_size; -/** Current size as scaling factor for the other components */ -extern ulint srv_buf_pool_base_size; -/** Current size in bytes */ -extern ulint srv_buf_pool_curr_size; /** Dump this % of each buffer pool during BP dump */ extern ulong srv_buf_pool_dump_pct; #ifdef UNIV_DEBUG @@ -267,8 +256,8 @@ /* We use this dummy default value at startup for max_io_capacity. The real value is set based on the value of io_capacity. */ -#define SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT (~0UL) -#define SRV_MAX_IO_CAPACITY_LIMIT (~0UL) +#define SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT (UINT32_MAX) +#define SRV_MAX_IO_CAPACITY_LIMIT (UINT32_MAX) extern ulong srv_max_io_capacity; /* The "innodb_stats_method" setting, decides how InnoDB is going @@ -294,9 +283,9 @@ extern ibool srv_innodb_status; -extern unsigned long long srv_stats_transient_sample_pages; +extern uint32_t srv_stats_transient_sample_pages; extern my_bool srv_stats_persistent; -extern unsigned long long srv_stats_persistent_sample_pages; +extern uint32_t srv_stats_persistent_sample_pages; extern my_bool srv_stats_auto_recalc; extern my_bool srv_stats_include_delete_marked; extern unsigned long long srv_stats_modified_counter; @@ -596,7 +585,7 @@ #endif /* BTR_CUR_HASH_ADAPT */ char innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */ char innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */ - char innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */ + char innodb_buffer_pool_resize_status[65];/*!< Buf pool resize status */ my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */ ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */ ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */ diff -Nru mariadb-10.11.11/storage/innobase/include/trx0trx.h mariadb-10.11.13/storage/innobase/include/trx0trx.h --- mariadb-10.11.11/storage/innobase/include/trx0trx.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/trx0trx.h 2025-05-19 16:14:25.000000000 +0000 @@ -809,8 +809,13 @@ /** normally set; "SET unique_checks=0, foreign_key_checks=0" enables bulk insert into an empty table */ unsigned check_unique_secondary:1; - /** whether an insert into an empty table is active */ - unsigned bulk_insert:1; + /** whether an insert into an empty table is active + Possible states are + TRX_NO_BULK + TRX_DML_BULK + TRX_DDL_BULK + @see trx_bulk_insert in trx0types.h */ + unsigned bulk_insert:2; /*------------------------------*/ /* MySQL has a transaction coordinator to coordinate two phase commit between multiple storage engines and the binary log. When @@ -1117,6 +1122,7 @@ ut_ad(!is_not_inheriting_locks()); ut_ad(check_foreigns); ut_ad(check_unique_secondary); + ut_ad(bulk_insert == TRX_NO_BULK); } /** This has to be invoked on SAVEPOINT or at the end of a statement. @@ -1142,6 +1148,8 @@ rollback to the start of a statement will work. */ void end_bulk_insert() { + if (bulk_insert == TRX_DDL_BULK) + return; for (auto& t : mod_tables) t.second.end_bulk_insert(); } @@ -1149,7 +1157,15 @@ /** @return whether a bulk insert into empty table is in progress */ bool is_bulk_insert() const { - if (!bulk_insert || check_unique_secondary || check_foreigns) + switch (bulk_insert) { + case TRX_NO_BULK: + return false; + case TRX_DDL_BULK: + return true; + default: + ut_ad(bulk_insert == TRX_DML_BULK); + } + if (check_unique_secondary || check_foreigns) return false; for (const auto& t : mod_tables) if (t.second.is_bulk_insert()) @@ -1179,9 +1195,11 @@ /** Do the bulk insert for the buffered insert operation for the transaction. @return DB_SUCCESS or error code */ + template dberr_t bulk_insert_apply() { - return UNIV_UNLIKELY(bulk_insert) ? bulk_insert_apply_low(): DB_SUCCESS; + static_assert(type != TRX_NO_BULK, ""); + return bulk_insert == type ? bulk_insert_apply_low(): DB_SUCCESS; } private: diff -Nru mariadb-10.11.11/storage/innobase/include/trx0types.h mariadb-10.11.13/storage/innobase/include/trx0types.h --- mariadb-10.11.11/storage/innobase/include/trx0types.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/trx0types.h 2025-05-19 16:14:25.000000000 +0000 @@ -65,6 +65,15 @@ TRX_STATE_COMMITTED_IN_MEMORY }; +/** Transaction bulk insert operation @see trx_t::bulk_insert */ +enum trx_bulk_insert { + TRX_NO_BULK, + /** bulk insert is being executed during DML */ + TRX_DML_BULK, + /** bulk insert is being executed in copy_data_between_tables() */ + TRX_DDL_BULK +}; + /** Memory objects */ /* @{ */ /** Transaction */ diff -Nru mariadb-10.11.11/storage/innobase/include/ut0new.h mariadb-10.11.13/storage/innobase/include/ut0new.h --- mariadb-10.11.11/storage/innobase/include/ut0new.h 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/include/ut0new.h 2025-05-19 16:14:25.000000000 +0000 @@ -277,7 +277,6 @@ #ifdef UNIV_PFS_MEMORY /** Default constructor. */ - explicit ut_allocator(PSI_memory_key key = PSI_NOT_INSTRUMENTED) : m_key(key) { diff -Nru mariadb-10.11.11/storage/innobase/lock/lock0lock.cc mariadb-10.11.13/storage/innobase/lock/lock0lock.cc --- mariadb-10.11.11/storage/innobase/lock/lock0lock.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/lock/lock0lock.cc 2025-05-19 16:14:25.000000000 +0000 @@ -4140,13 +4140,12 @@ children.end()) continue; /* We already acquired MDL on this child table. */ MDL_ticket *mdl= nullptr; - child->acquire(); child= dict_acquire_mdl_shared(child, mdl_context, &mdl, DICT_TABLE_OP_NORMAL); if (child) { - if (!mdl) - child->release(); + if (mdl) + child->acquire(); children.emplace_back(table_mdl{child, mdl}); goto rescan; } @@ -6053,17 +6052,10 @@ for it */ trx_t *trx = thr_get_trx(thr); - if (const trx_t *owner = - lock_rec_convert_impl_to_expl(trx, *block, - rec, index, offsets)) { - if (owner == trx) { - /* We already hold an exclusive lock. */ - return DB_SUCCESS; - } - - if (trx->snapshot_isolation && trx->read_view.is_open()) { - return DB_RECORD_CHANGED; - } + if (lock_rec_convert_impl_to_expl(trx, *block, + rec, index, offsets) == trx) { + /* We already hold an exclusive lock. */ + return DB_SUCCESS; } err = lock_rec_lock(true, LOCK_X | LOCK_REC_NOT_GAP, @@ -6225,19 +6217,11 @@ return DB_SUCCESS; } - if (page_rec_is_supremum(rec)) { - } else if (const trx_t *owner = - lock_rec_convert_impl_to_expl(trx, *block, - rec, index, offsets)) { - if (owner == trx) { - if (gap_mode == LOCK_REC_NOT_GAP) { - /* We already hold an exclusive lock. */ - return DB_SUCCESS; - } - } else if (trx->snapshot_isolation - && trx->read_view.is_open()) { - return DB_RECORD_CHANGED; - } + if (!page_rec_is_supremum(rec) + && lock_rec_convert_impl_to_expl(trx, *block, rec, index, + offsets) == trx + && gap_mode == LOCK_REC_NOT_GAP) { + return DB_SUCCESS; } #ifdef WITH_WSREP @@ -6317,28 +6301,24 @@ trx_t *trx = thr_get_trx(thr); if (lock_table_has(trx, index->table, LOCK_X) || heap_no == PAGE_HEAP_NO_SUPREMUM) { - } else if (const trx_t *owner = - lock_rec_convert_impl_to_expl(trx, *block, - rec, index, offsets)) { - if (owner == trx) { - if (gap_mode == LOCK_REC_NOT_GAP) { - /* We already hold an exclusive lock. */ - return DB_SUCCESS; - } - } else if (trx->snapshot_isolation - && trx->read_view.is_open()) { - return DB_RECORD_CHANGED; - } + } else if (lock_rec_convert_impl_to_expl(trx, *block, rec, index, + offsets) == trx + && gap_mode == LOCK_REC_NOT_GAP) { + /* We already hold an exclusive lock. */ + return DB_SUCCESS; } if (heap_no > PAGE_HEAP_NO_SUPREMUM && gap_mode != LOCK_GAP && trx->snapshot_isolation - && trx->read_view.is_open() - && !trx->read_view.changes_visible( - trx_read_trx_id(rec + row_trx_id_offset(rec, index))) - && IF_WSREP(!(trx->is_wsrep() + && trx->read_view.is_open()) { + trx_id_t trx_id= trx_read_trx_id(rec + + row_trx_id_offset(rec, index)); + if (!trx_sys.is_registered(trx, trx_id) + && !trx->read_view.changes_visible(trx_id) + && IF_WSREP(!(trx->is_wsrep() && wsrep_thd_skip_locking(trx->mysql_thd)), true)) { - return DB_RECORD_CHANGED; + return DB_RECORD_CHANGED; + } } dberr_t err = lock_rec_lock(false, gap_mode | mode, @@ -7109,10 +7089,6 @@ victim->lock.was_chosen_as_deadlock_victim= true; DEBUG_SYNC_C("deadlock_report_before_lock_releasing"); lock_cancel_waiting_and_release(victim->lock.wait_lock); -#ifdef WITH_WSREP - if (victim->is_wsrep() && wsrep_thd_is_SR(victim->mysql_thd)) - wsrep_handle_SR_rollback(trx->mysql_thd, victim->mysql_thd); -#endif } func_exit: diff -Nru mariadb-10.11.11/storage/innobase/log/log0crypt.cc mariadb-10.11.13/storage/innobase/log/log0crypt.cc --- mariadb-10.11.11/storage/innobase/log/log0crypt.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/log/log0crypt.cc 2025-05-19 16:14:25.000000000 +0000 @@ -566,7 +566,7 @@ alignas(8) byte iv[MY_AES_BLOCK_SIZE]; - m_commit_lsn= log_sys.get_lsn(); + m_commit_lsn= log_sys.get_flushed_lsn(); ut_ad(m_commit_lsn); byte *tmp= static_cast(alloca(srv_page_size)), *t= tmp; byte *dst= static_cast(alloca(srv_page_size)); diff -Nru mariadb-10.11.11/storage/innobase/log/log0log.cc mariadb-10.11.13/storage/innobase/log/log0log.cc --- mariadb-10.11.11/storage/innobase/log/log0log.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/log/log0log.cc 2025-05-19 16:14:25.000000000 +0000 @@ -68,7 +68,7 @@ #define LOG_BUF_FLUSH_MARGIN ((4 * 4096) /* cf. log_t::append_prepare() */ \ + (4U << srv_page_size_shift)) -void log_t::set_capacity() +void log_t::set_capacity() noexcept { ut_ad(log_sys.latch_have_wr()); /* Margin for the free space in the smallest log, before a new query @@ -87,13 +87,15 @@ log_sys.max_checkpoint_age = margin; } -void log_t::create() +void log_t::create() noexcept { ut_ad(this == &log_sys); ut_ad(!is_initialised()); + latch.SRW_LOCK_INIT(log_latch_key); + write_lsn_offset= 0; /* LSN 0 and 1 are reserved; @see buf_page_t::oldest_modification_ */ - lsn.store(FIRST_LSN, std::memory_order_relaxed); + base_lsn.store(FIRST_LSN, std::memory_order_relaxed); flushed_to_disk_lsn.store(FIRST_LSN, std::memory_order_relaxed); need_checkpoint.store(true, std::memory_order_relaxed); write_lsn= FIRST_LSN; @@ -102,10 +104,10 @@ ut_ad(!buf); ut_ad(!flush_buf); ut_ad(!writer); - max_buf_free= 1; - latch.SRW_LOCK_INIT(log_latch_key); - lsn_lock.init(); +#ifdef HAVE_PMEM + resize_wrap_mutex.init(); +#endif last_checkpoint_lsn= FIRST_LSN; log_capacity= 0; @@ -114,8 +116,6 @@ next_checkpoint_lsn= 0; checkpoint_pending= false; - set_buf_free(0); - ut_ad(is_initialised()); } @@ -306,7 +306,7 @@ #if defined __linux__ || defined _WIN32 /** Display a message about opening the log */ -ATTRIBUTE_COLD static void log_file_message() +ATTRIBUTE_COLD static void log_file_message() noexcept { sql_print_information("InnoDB: %s (block size=%u bytes)", log_sys.log_mmap @@ -320,10 +320,10 @@ log_sys.write_size); } #else -static inline void log_file_message() {} +static inline void log_file_message() noexcept {} #endif -bool log_t::attach(log_file_t file, os_offset_t size) +bool log_t::attach(log_file_t file, os_offset_t size) noexcept { log= file; ut_ad(!size || size >= START_OFFSET + SIZE_OF_FILE_CHECKPOINT); @@ -352,8 +352,7 @@ } # endif buf= static_cast(ptr); - max_buf_free= 1; - writer_update(); + writer_update(false); # ifdef HAVE_PMEM if (is_pmem) return true; @@ -366,7 +365,7 @@ if (!buf) { alloc_fail: - max_buf_free= 0; + base_lsn.store(0, std::memory_order_relaxed); sql_print_error("InnoDB: Cannot allocate memory;" " too large innodb_log_buffer_size?"); return false; @@ -394,8 +393,7 @@ TRASH_ALLOC(buf, buf_size); TRASH_ALLOC(flush_buf, buf_size); - max_buf_free= buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN; - writer_update(); + writer_update(false); memset_aligned<512>(checkpoint_buf, 0, write_size); func_exit: @@ -407,7 +405,7 @@ @param buf log header buffer @param lsn log sequence number corresponding to log_sys.START_OFFSET @param encrypted whether the log is encrypted */ -void log_t::header_write(byte *buf, lsn_t lsn, bool encrypted) +void log_t::header_write(byte *buf, lsn_t lsn, bool encrypted) noexcept { mach_write_to_4(my_assume_aligned<4>(buf) + LOG_HEADER_FORMAT, log_sys.FORMAT_10_8); @@ -436,8 +434,9 @@ ut_ad(is_latest()); ut_ad(this == &log_sys); - this->lsn.store(lsn, std::memory_order_relaxed); - this->flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); + write_lsn_offset= 0; + base_lsn.store(lsn, std::memory_order_relaxed); + flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); first_lsn= lsn; write_lsn= lsn; @@ -452,14 +451,13 @@ mprotect(buf, size_t(file_size), PROT_READ | PROT_WRITE); memset_aligned<4096>(buf, 0, 4096); log_sys.header_write(buf, lsn, is_encrypted()); - set_buf_free(START_OFFSET); pmem_persist(buf, 512); + buf_size= unsigned(std::min(capacity(), buf_size_max)); } else #endif { ut_ad(!is_mmap()); - set_buf_free(0); memset_aligned<4096>(flush_buf, 0, buf_size); memset_aligned<4096>(buf, 0, buf_size); log_sys.header_write(buf, lsn, is_encrypted()); @@ -468,12 +466,12 @@ } } -ATTRIBUTE_COLD static void log_close_failed(dberr_t err) +ATTRIBUTE_COLD static void log_close_failed(dberr_t err) noexcept { ib::fatal() << "closing ib_logfile0 failed: " << err; } -void log_t::close_file(bool really_close) +void log_t::close_file(bool really_close) noexcept { if (is_mmap()) { @@ -508,16 +506,25 @@ log_close_failed(err); } +/** @return the current log sequence number (may be stale) */ +lsn_t log_get_lsn() noexcept +{ + log_sys.latch.wr_lock(SRW_LOCK_CALL); + lsn_t lsn= log_sys.get_lsn(); + log_sys.latch.wr_unlock(); + return lsn; +} + /** Acquire all latches that protect the log. */ -static void log_resize_acquire() +static void log_resize_acquire() noexcept { #ifdef HAVE_PMEM if (!log_sys.is_mmap()) #endif { - while (flush_lock.acquire(log_sys.get_lsn() + 1, nullptr) != + while (flush_lock.acquire(log_get_lsn() + 1, nullptr) != group_commit_lock::ACQUIRED); - while (write_lock.acquire(log_sys.get_lsn() + 1, nullptr) != + while (write_lock.acquire(log_get_lsn() + 1, nullptr) != group_commit_lock::ACQUIRED); } @@ -525,7 +532,7 @@ } /** Release the latches that protect the log. */ -void log_resize_release() +void log_resize_release() noexcept { log_sys.latch.wr_unlock(); @@ -542,7 +549,7 @@ #if defined __linux__ || defined _WIN32 /** Try to enable or disable file system caching (update log_buffered) */ -void log_t::set_buffered(bool buffered) +void log_t::set_buffered(bool buffered) noexcept { if (!log_maybe_unbuffered || #ifdef HAVE_PMEM @@ -570,31 +577,35 @@ /** Start resizing the log and release the exclusive latch. @param size requested new file_size +@param thd the current thread identifier @return whether the resizing was started successfully */ -log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept +log_t::resize_start_status log_t::resize_start(os_offset_t size, void *thd) + noexcept { ut_ad(size >= 4U << 20); ut_ad(!(size & 4095)); ut_ad(!srv_read_only_mode); + ut_ad(thd); log_resize_acquire(); - resize_start_status status= RESIZE_NO_CHANGE; - lsn_t start_lsn{0}; -#ifdef HAVE_PMEM - bool is_pmem{false}; -#endif + resize_start_status status; - if (resize_in_progress()) + if (size == file_size) + status= RESIZE_NO_CHANGE; + else if (resize_in_progress()) status= RESIZE_IN_PROGRESS; - else if (size != file_size) + else { + lsn_t start_lsn; ut_ad(!resize_in_progress()); ut_ad(!resize_log.is_opened()); ut_ad(!resize_buf); ut_ad(!resize_flush_buf); + ut_ad(!resize_initiator); std::string path{get_log_file_path("ib_logfile101")}; bool success; + resize_initiator= thd; resize_lsn.store(1, std::memory_order_relaxed); resize_target= 0; resize_log.m_file= @@ -612,6 +623,7 @@ #ifdef HAVE_PMEM else if (is_mmap()) { + bool is_pmem{false}; ptr= ::log_mmap(resize_log.m_file, is_pmem, size); if (ptr == MAP_FAILED) @@ -661,34 +673,33 @@ else if (!is_opened()) resize_log.close(); - writer_update(); + resize_lsn.store(start_lsn, std::memory_order_relaxed); + writer_update(true); + log_resize_release(); + + mysql_mutex_lock(&buf_pool.flush_list_mutex); + lsn_t target_lsn= buf_pool.get_oldest_modification(0); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + buf_flush_ahead(start_lsn < target_lsn ? target_lsn + 1 : start_lsn, + false); + return RESIZE_STARTED; } - status= success ? RESIZE_STARTED : RESIZE_FAILED; } - resize_lsn.store(start_lsn, std::memory_order_relaxed); + resize_initiator= nullptr; + resize_lsn.store(0, std::memory_order_relaxed); + status= RESIZE_FAILED; } log_resize_release(); - - if (start_lsn) - { - mysql_mutex_lock(&buf_pool.flush_list_mutex); - lsn_t target_lsn= buf_pool.get_oldest_modification(0); - if (start_lsn < target_lsn) - start_lsn= target_lsn + 1; - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - buf_flush_ahead(start_lsn, false); - } - return status; } -/** Abort log resizing. */ -void log_t::resize_abort() noexcept +/** Abort a resize_start() that we started. */ +void log_t::resize_abort(void *thd) noexcept { log_resize_acquire(); - if (resize_in_progress() > 1) + if (resize_running(thd)) { #ifdef HAVE_PMEM const bool is_mmap{this->is_mmap()}; @@ -715,11 +726,12 @@ resize_buf= nullptr; resize_target= 0; resize_lsn.store(0, std::memory_order_relaxed); + resize_initiator= nullptr; std::string path{get_log_file_path("ib_logfile101")}; IF_WIN(DeleteFile(path.c_str()), unlink(path.c_str())); + writer_update(false); } - writer_update(); log_resize_release(); } @@ -882,9 +894,7 @@ ut_ad(!is_opened()); ut_ad(!write_lock.is_owner()); ut_ad(!flush_lock.is_owner()); -#ifdef LOG_LATCH_DEBUG - ut_ad(latch_have_any()); -#endif + ut_ad(latch_have_wr()); lsn_t old= flushed_to_disk_lsn.load(std::memory_order_relaxed); @@ -902,26 +912,26 @@ else pmem_persist(buf + start, end - start); - old= flushed_to_disk_lsn.load(std::memory_order_relaxed); - - if (old < lsn) - { - while (!flushed_to_disk_lsn.compare_exchange_weak - (old, lsn, std::memory_order_release, std::memory_order_relaxed)) - if (old >= lsn) - break; - - log_flush_notify(lsn); - DBUG_EXECUTE_IF("crash_after_log_write_upto", DBUG_SUICIDE();); - } + uint64_t offset{write_lsn_offset}; + const lsn_t new_base_lsn= base_lsn.load(std::memory_order_relaxed) + + (offset & (WRITE_BACKOFF - 1)); + ut_ad(new_base_lsn >= lsn); + write_to_buf+= size_t(offset >> WRITE_TO_BUF_SHIFT); + /* This synchronizes with get_lsn_approx(); + we must store write_lsn_offset before base_lsn. */ + write_lsn_offset.store(0, std::memory_order_relaxed); + base_lsn.store(new_base_lsn, std::memory_order_release); + flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); + log_flush_notify(lsn); + DBUG_EXECUTE_IF("crash_after_log_write_upto", DBUG_SUICIDE();); } ATTRIBUTE_NOINLINE static void log_write_persist(lsn_t lsn) noexcept { - log_sys.latch.rd_lock(SRW_LOCK_CALL); + log_sys.latch.wr_lock(SRW_LOCK_CALL); log_sys.persist(lsn); - log_sys.latch.rd_unlock(); + log_sys.latch.wr_unlock(); } #endif @@ -972,7 +982,7 @@ ut_ad(resizing == RETAIN_LATCH || (resizing == RESIZING) == (resize_in_progress() > 1)); - const lsn_t lsn{get_lsn(std::memory_order_relaxed)}; + const lsn_t lsn{get_lsn()}; if (write_lsn >= lsn) { @@ -988,7 +998,8 @@ ut_ad(write_lsn >= get_flushed_lsn()); const size_t write_size_1{write_size - 1}; ut_ad(ut_is_2pow(write_size)); - size_t length{buf_free.load(std::memory_order_relaxed)}; + lsn_t base= base_lsn.load(std::memory_order_relaxed); + size_t length{size_t(lsn - base)}; lsn_t offset{calc_lsn_offset(write_lsn)}; ut_ad(length >= (offset & write_size_1)); ut_ad(write_size_1 >= 511); @@ -1010,14 +1021,8 @@ { ut_ad(!((length ^ (size_t(lsn) - size_t(first_lsn))) & write_size_1)); /* Keep filling the same buffer until we have more than one block. */ -#if 0 /* TODO: Pad the last log block with dummy records. */ - buf_free= log_pad(lsn, (write_size_1 + 1) - length, - buf + length, flush_buf); - ... /* TODO: Update the LSN and adjust other code. */ -#else MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - length); buf[length]= 0; /* ensure that recovery catches EOF */ -#endif if (UNIV_LIKELY_NULL(re_write_buf)) { MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - length); @@ -1028,8 +1033,13 @@ else { const size_t new_buf_free{length & write_size_1}; + base+= length & ~write_size_1; ut_ad(new_buf_free == ((lsn - first_lsn) & write_size_1)); - buf_free.store(new_buf_free, std::memory_order_relaxed); + write_to_buf+= size_t(write_lsn_offset >> WRITE_TO_BUF_SHIFT); + /* This synchronizes with get_lsn_approx(); + we must store write_lsn_offset before base_lsn. */ + write_lsn_offset.store(new_buf_free, std::memory_order_relaxed); + base_lsn.store(base, std::memory_order_release); if (new_buf_free) { @@ -1039,12 +1049,13 @@ the current LSN are generated. */ MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - new_buf_free); buf[length]= 0; /* allow recovery to catch EOF faster */ + if (UNIV_LIKELY_NULL(re_write_buf)) + MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - + new_buf_free); length&= ~write_size_1; memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15); if (UNIV_LIKELY_NULL(re_write_buf)) { - MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - - new_buf_free); memcpy_aligned<16>(resize_flush_buf, re_write_buf + length, (new_buf_free + 15) & ~15); re_write_buf[length + new_buf_free]= 0; @@ -1057,7 +1068,9 @@ std::swap(resize_buf, resize_flush_buf); } + ut_ad(base + (write_lsn_offset & (WRITE_TO_BUF - 1)) == lsn); write_to_log++; + if (resizing != RETAIN_LATCH) latch.wr_unlock(); @@ -1101,7 +1114,7 @@ @retval 0 if there are no pending callbacks on flush_lock or there is another group commit lead. */ -static lsn_t log_flush(lsn_t lsn) +static lsn_t log_flush(lsn_t lsn) noexcept { ut_ad(!log_sys.is_mmap()); ut_a(log_sys.flush(lsn)); @@ -1120,7 +1133,7 @@ void log_write_up_to(lsn_t lsn, bool durable, const completion_callback *callback) noexcept { - ut_ad(!srv_read_only_mode || log_sys.buf_free_ok()); + ut_ad(!srv_read_only_mode); ut_ad(lsn != LSN_MAX); ut_ad(lsn != 0); ut_ad(!log_sys.is_mmap() || !callback || durable); @@ -1133,8 +1146,6 @@ return; } - ut_ad(lsn <= log_sys.get_lsn()); - #ifdef HAVE_PMEM if (log_sys.is_mmap()) { @@ -1151,10 +1162,10 @@ if (flush_lock.acquire(lsn, callback) != group_commit_lock::ACQUIRED) return; /* Promise to other concurrent flush_lock.acquire() that we - will durable at least up to the current LSN. The LSN may still - advance until we acquire log_sys.latch below. */ - lsn= log_sys.get_lsn(); - flush_lock.set_pending(lsn); + will be durable at least up to the current LSN. The LSN may still + advance when we acquire log_sys.latch below. */ + if (lsn > log_sys.get_flushed_lsn()) + flush_lock.set_pending(lsn); } lsn_t pending_write_lsn= 0, pending_flush_lsn= 0; @@ -1190,42 +1201,50 @@ return log_sys.write_buf(); } -void log_t::writer_update() noexcept +void log_t::writer_update(bool resizing) noexcept { ut_ad(latch_have_wr()); - writer= resize_in_progress() ? log_writer_resizing : log_writer; + ut_ad(resizing == (resize_in_progress() > 1)); + writer= resizing ? log_writer_resizing : log_writer; mtr_t::finisher_update(); } /** Write to the log file up to the last log entry. @param durable whether to wait for a durable write to complete */ -void log_buffer_flush_to_disk(bool durable) +void log_buffer_flush_to_disk(bool durable) noexcept { - log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), durable); + log_write_up_to(log_get_lsn(), durable); } /** Prepare to invoke log_write_and_flush(), before acquiring log_sys.latch. */ -ATTRIBUTE_COLD void log_write_and_flush_prepare() +ATTRIBUTE_COLD void log_write_and_flush_prepare() noexcept { #ifdef HAVE_PMEM if (log_sys.is_mmap()) return; #endif - while (flush_lock.acquire(log_sys.get_lsn() + 1, nullptr) != + while (flush_lock.acquire(log_get_lsn() + 1, nullptr) != group_commit_lock::ACQUIRED); - while (write_lock.acquire(log_sys.get_lsn() + 1, nullptr) != + while (write_lock.acquire(log_get_lsn() + 1, nullptr) != group_commit_lock::ACQUIRED); } -void log_t::clear_mmap() +void log_t::clear_mmap() noexcept { - if (!is_mmap() || + if (!is_mmap() || high_level_read_only) + return; #ifdef HAVE_PMEM - !is_opened() || -#endif - high_level_read_only) + if (!is_opened()) + { + ut_d(latch.wr_lock(SRW_LOCK_CALL)); + ut_ad(!resize_in_progress()); + ut_ad(get_lsn() == get_flushed_lsn(std::memory_order_relaxed)); + ut_d(latch.wr_unlock()); return; + } +#endif + log_resize_acquire(); ut_ad(!resize_in_progress()); ut_ad(write_lsn == get_lsn()); @@ -1235,10 +1254,10 @@ { alignas(16) byte log_block[4096]; const size_t bs{write_size}; - const size_t bf{buf_free.load(std::memory_order_relaxed)}; { - byte *const b= buf; - memcpy_aligned<16>(log_block, b + (bf & ~(bs - 1)), bs); + const size_t bf= + size_t(write_lsn - base_lsn.load(std::memory_order_relaxed)); + memcpy_aligned<16>(log_block, buf + (bf & ~(bs - 1)), bs); } close_file(false); @@ -1246,14 +1265,13 @@ ut_a(attach(log, file_size)); ut_ad(!is_mmap()); - set_buf_free(bf & (bs - 1)); - memcpy_aligned<16>(log_sys.buf, log_block, bs); + memcpy_aligned<16>(buf, log_block, bs); } log_resize_release(); } /** Durably write the log up to log_sys.get_lsn(). */ -ATTRIBUTE_COLD void log_write_and_flush() +ATTRIBUTE_COLD void log_write_and_flush() noexcept { ut_ad(!srv_read_only_mode); #ifdef HAVE_PMEM @@ -1273,17 +1291,17 @@ that a new log entry can be catenated without an immediate need for a checkpoint. NOTE: this function may only be called if the calling thread owns no synchronization objects! */ -ATTRIBUTE_COLD static void log_checkpoint_margin() +ATTRIBUTE_COLD static void log_checkpoint_margin() noexcept { while (log_sys.check_for_checkpoint()) { - log_sys.latch.rd_lock(SRW_LOCK_CALL); + log_sys.latch.wr_lock(SRW_LOCK_CALL); ut_ad(!recv_no_log_write); if (!log_sys.check_for_checkpoint()) { func_exit: - log_sys.latch.rd_unlock(); + log_sys.latch.wr_unlock(); return; } @@ -1301,7 +1319,7 @@ } DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto skip_checkpoint;); - log_sys.latch.rd_unlock(); + log_sys.latch.wr_unlock(); /* We must wait to prevent the tail of the log overwriting the head. */ buf_flush_wait_flushed(std::min(sync_lsn, checkpoint + (1U << 20))); @@ -1313,7 +1331,7 @@ /** Wait for a log checkpoint if needed. NOTE that this function may only be called while not holding any synchronization objects except dict_sys.latch. */ -void log_free_check() +void log_free_check() noexcept { ut_ad(!lock_sys.is_holder()); if (log_sys.check_for_checkpoint()) @@ -1323,10 +1341,14 @@ } } -extern void buf_resize_shutdown(); +#ifdef __linux__ +extern void buf_mem_pressure_shutdown() noexcept; +#else +inline void buf_mem_pressure_shutdown() noexcept {} +#endif /** Make a checkpoint at the latest lsn on shutdown. */ -ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() +ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept { lsn_t lsn; ulint count = 0; @@ -1341,8 +1363,7 @@ srv_master_timer.reset(); } - /* Wait for the end of the buffer resize task.*/ - buf_resize_shutdown(); + buf_mem_pressure_shutdown(); dict_stats_shutdown(); btr_defragment_shutdown(); @@ -1464,7 +1485,7 @@ ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT; - log_sys.latch.rd_lock(SRW_LOCK_CALL); + log_sys.latch.wr_lock(SRW_LOCK_CALL); lsn = log_sys.get_lsn(); @@ -1472,7 +1493,7 @@ && lsn != log_sys.last_checkpoint_lsn + sizeof_cp; ut_ad(lsn >= log_sys.last_checkpoint_lsn); - log_sys.latch.rd_unlock(); + log_sys.latch.wr_unlock(); if (lsn_changed) { goto loop; @@ -1490,7 +1511,7 @@ "Free innodb buffer pool"); ut_d(buf_pool.assert_all_freed()); - ut_a(lsn == log_sys.get_lsn() + ut_a(lsn == log_get_lsn() || srv_force_recovery == SRV_FORCE_NO_LOG_REDO); if (UNIV_UNLIKELY(lsn < recv_sys.lsn)) { @@ -1504,7 +1525,7 @@ /* Make some checks that the server really is quiet */ ut_ad(!srv_any_background_activity()); - ut_a(lsn == log_sys.get_lsn() + ut_a(lsn == log_get_lsn() || srv_force_recovery == SRV_FORCE_NO_LOG_REDO); } @@ -1515,44 +1536,42 @@ /*======*/ FILE* file) /*!< in: file where to print */ { - log_sys.latch.rd_lock(SRW_LOCK_CALL); + log_sys.latch.wr_lock(SRW_LOCK_CALL); const lsn_t lsn= log_sys.get_lsn(); mysql_mutex_lock(&buf_pool.flush_list_mutex); const lsn_t pages_flushed = buf_pool.get_oldest_modification(lsn); mysql_mutex_unlock(&buf_pool.flush_list_mutex); + const lsn_t flushed_lsn{log_sys.get_flushed_lsn()}; + const lsn_t checkpoint_lsn{log_sys.last_checkpoint_lsn}; + log_sys.latch.wr_unlock(); fprintf(file, "Log sequence number " LSN_PF "\n" "Log flushed up to " LSN_PF "\n" "Pages flushed up to " LSN_PF "\n" "Last checkpoint at " LSN_PF "\n", - lsn, - log_sys.get_flushed_lsn(), - pages_flushed, - lsn_t{log_sys.last_checkpoint_lsn}); - - log_sys.latch.rd_unlock(); + lsn, flushed_lsn, pages_flushed, checkpoint_lsn); } /** Shut down the redo log subsystem. */ void log_t::close() { ut_ad(this == &log_sys); - ut_ad(!(buf_free & buf_free_LOCK)); if (!is_initialised()) return; close_file(); ut_ad(!checkpoint_buf); ut_ad(!buf); ut_ad(!flush_buf); + base_lsn.store(0, std::memory_order_relaxed); latch.destroy(); - lsn_lock.destroy(); +#ifdef HAVE_PMEM + resize_wrap_mutex.destroy(); +#endif recv_sys.close(); - - max_buf_free= 0; } std::string get_log_file_path(const char *filename) diff -Nru mariadb-10.11.11/storage/innobase/log/log0recv.cc mariadb-10.11.13/storage/innobase/log/log0recv.cc --- mariadb-10.11.11/storage/innobase/log/log0recv.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/log/log0recv.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1266,6 +1266,13 @@ } else if (p.second // the first FILE_MODIFY or FILE_RENAME || f.name != fname.name) { reload: + if (f.name.size() == 0) { + /* Augment the recv_spaces.emplace_hint() for the + FILE_MODIFY record that had been added by + recv_sys_t::parse() */ + f.name = fname.name; + } + fil_space_t* space; /* Check if the tablespace file exists and contains @@ -1466,6 +1473,7 @@ mysql_mutex_lock(&mutex); recovery_on= false; + recv_needed_recovery= false; pages.clear(); pages_it= pages.end(); @@ -1473,7 +1481,6 @@ log_sys.clear_mmap(); } - /** Free a redo log snippet. @param data buffer allocated in add() */ inline void recv_sys_t::free(const void *data) @@ -1481,34 +1488,18 @@ ut_ad(!ut_align_offset(data, ALIGNMENT)); mysql_mutex_assert_owner(&mutex); - /* MDEV-14481 FIXME: To prevent race condition with buf_pool.resize(), - we must acquire and hold the buffer pool mutex here. */ - ut_ad(!buf_pool.resize_in_progress()); - - auto *chunk= buf_pool.chunks; - for (auto i= buf_pool.n_chunks; i--; chunk++) + buf_block_t *block= buf_pool.block_from(data); + ut_ad(block->page.frame == page_align(data)); + ut_ad(block->page.state() == buf_page_t::MEMORY); + ut_ad(uint16_t(block->page.free_offset - 1) < srv_page_size); + ut_ad(block->page.used_records); + if (!--block->page.used_records) { - if (data < chunk->blocks->page.frame) - continue; - const size_t offs= (reinterpret_cast(data) - - chunk->blocks->page.frame) >> srv_page_size_shift; - if (offs >= chunk->size) - continue; - buf_block_t *block= &chunk->blocks[offs]; - ut_ad(block->page.frame == page_align(data)); - ut_ad(block->page.state() == buf_page_t::MEMORY); - ut_ad(uint16_t(block->page.free_offset - 1) < srv_page_size); - ut_ad(block->page.used_records); - if (!--block->page.used_records) - { - block->page.hash= nullptr; - UT_LIST_REMOVE(blocks, block); - MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); - buf_block_free(block); - } - return; + block->page.hash= nullptr; + UT_LIST_REMOVE(blocks, block); + MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); + buf_block_free(block); } - ut_ad(0); } @@ -2057,12 +2048,13 @@ { mysql_mutex_unlock(&mutex); os_aio_wait_until_no_pending_reads(false); + os_aio_wait_until_no_pending_writes(false); mysql_mutex_lock(&mutex); garbage_collect(); mysql_mutex_lock(&buf_pool.mutex); - bool need_more= UT_LIST_GET_LEN(buf_pool.free) < pages; + const size_t available= UT_LIST_GET_LEN(buf_pool.free); mysql_mutex_unlock(&buf_pool.mutex); - if (need_more) + if (available < pages) buf_flush_sync_batch(lsn); } @@ -2507,9 +2499,11 @@ ut_ad(log_sys.is_latest()); alignas(8) byte iv[MY_AES_BLOCK_SIZE]; - byte *decrypt_buf= storing != BACKUP - ? static_cast(alloca(srv_page_size)) : nullptr; - + byte *decrypt_buf= + static_cast(alloca(storing == BACKUP + ? 1/*type,length*/ + 5/*space_id*/ + + 5/*page_no*/ + 1/*rlen*/ + : srv_page_size)); const lsn_t start_lsn{lsn}; /* Check that the entire mini-transaction is included within the buffer */ @@ -2599,7 +2593,10 @@ ut_d(std::set modified); #endif - uint32_t space_id= 0, page_no= 0, last_offset= 0; + uint32_t space_id= 0, page_no= 0; + /* The end offset the last write (always 0 in storing==BACKUP). + The value 1 means that no "same page" record is allowed. */ + uint last_offset= 0; bool got_page_op= false; for (l= begin;; l+= rlen) @@ -2712,8 +2709,7 @@ { mach_write_to_4(iv + 8, space_id); mach_write_to_4(iv + 12, page_no); - byte eb[1/*type,length*/ + 5/*space_id*/ + 5/*page_no*/ + 1/*rlen*/]; - if (*l.copy_if_needed(iv, eb, recs, 1) == TRIM_PAGES) + if (*l.copy_if_needed(iv, decrypt_buf, recs, 1) == TRIM_PAGES) undo_space_trunc(space_id); } continue; @@ -2726,8 +2722,8 @@ if (i != recv_spaces.end() && i->first == space_id); else if (lsn < file_checkpoint) /* We have not seen all records between the checkpoint and - FILE_CHECKPOINT. There should be a FILE_DELETE for this - tablespace later. */ + FILE_CHECKPOINT. There should be a FILE_DELETE or FILE_MODIFY + for this tablespace later, to be handled in fil_name_process(). */ recv_spaces.emplace_hint(i, space_id, file_name_t("", false)); else { @@ -2762,10 +2758,10 @@ case FREE_PAGE: ut_ad(freed.emplace(id).second); /* the next record must not be same_page */ - last_offset= 1; + if (storing != BACKUP) last_offset= 1; goto free_or_init_page; case INIT_PAGE: - last_offset= FIL_PAGE_TYPE; + if (storing != BACKUP) last_offset= FIL_PAGE_TYPE; free_or_init_page: if (UNIV_UNLIKELY(rlen != 0)) goto record_corrupted; @@ -2797,7 +2793,8 @@ erase(r); continue; } - cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); + if (storing == YES) + cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); break; case EXTENDED: if (storing == NO) @@ -2811,7 +2808,8 @@ continue; if (UNIV_UNLIKELY(!rlen)) goto record_corrupted; - cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); + if (storing == YES || rlen == 1) + cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); if (rlen == 1 && *cl == TRIM_PAGES) { if (!srv_is_undo_tablespace(space_id) || @@ -2825,7 +2823,7 @@ truncated_undo_spaces[space_id - srv_undo_space_id_start]= { start_lsn, page_no }; /* the next record must not be same_page */ - last_offset= 1; + if (storing != BACKUP) last_offset= 1; if (undo_space_trunc) undo_space_trunc(space_id); continue; @@ -2833,7 +2831,7 @@ /* This record applies to an undo log or index page, and it may be followed by subsequent WRITE or similar records for the same page in the same mini-transaction. */ - last_offset= FIL_PAGE_TYPE; + if (storing != BACKUP) last_offset= FIL_PAGE_TYPE; break; case OPTION: /* OPTION records can be safely ignored in recovery */ @@ -2850,6 +2848,8 @@ case WRITE: case MEMMOVE: case MEMSET: + if (storing == BACKUP) + continue; if (storing == NO && UNIV_LIKELY(page_no != 0)) /* fil_space_set_recv_size_and_flags() is mandatory for storing==NO. It is only applicable to page_no == 0. Other than that, we can just @@ -2979,7 +2979,7 @@ l - recs + rlen))) { lsn= start_lsn; - if (lsn > log_sys.get_lsn()) + if (lsn > log_sys.get_flushed_lsn(std::memory_order_relaxed)) log_sys.set_recovered_lsn(start_lsn); l+= rlen; offset= begin.ptr - log_sys.buf; @@ -3566,13 +3566,14 @@ } else { + const lsn_t end{std::max(recv_sys.scanned_lsn, recv_sys.file_checkpoint)}; sql_print_information("InnoDB: To recover: LSN " LSN_PF "/" LSN_PF "; %zu pages", - recv_sys.lsn, recv_sys.scanned_lsn, n); + recv_sys.lsn, end, n); service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, "To recover: LSN " LSN_PF "/" LSN_PF "; %zu pages", - recv_sys.lsn, recv_sys.scanned_lsn, n); + recv_sys.lsn, end, n); } } @@ -4113,8 +4114,8 @@ {log_sys.buf + recv_sys.len, size})) { mysql_mutex_unlock(&recv_sys.mutex); - ib::error() << "Failed to read log at " << source_offset - << ": " << err; + sql_print_error("InnoDB: Failed to read log at %" PRIu64 ": %s", + source_offset, ut_strerr(err)); recv_sys.set_corrupt_log(); mysql_mutex_lock(&recv_sys.mutex); } @@ -4294,7 +4295,7 @@ break; case SRV_OPERATION_RESTORE: case SRV_OPERATION_RESTORE_EXPORT: - if (i->second.name.find("/#sql") != std::string::npos) { + if (i->second.name.find("/#sql") == std::string::npos) { sql_print_warning("InnoDB: Tablespace " UINT32PF " was not found at %.*s when" " restoring a (partial?) backup." @@ -4588,19 +4589,19 @@ inline void log_t::set_recovered() noexcept { ut_ad(get_flushed_lsn() == get_lsn()); - ut_ad(recv_sys.lsn == get_lsn()); - size_t offset{recv_sys.offset}; + ut_ad(recv_sys.lsn == get_flushed_lsn()); if (!is_mmap()) { const size_t bs{log_sys.write_size}, bs_1{bs - 1}; - memmove_aligned<512>(buf, buf + (offset & ~bs_1), bs); - offset&= bs_1; + memmove_aligned<512>(buf, buf + (recv_sys.offset & ~bs_1), bs); } -#ifndef _WIN32 +#ifdef HAVE_PMEM else + { + buf_size= unsigned(std::min(capacity(), buf_size_max)); mprotect(buf, size_t(file_size), PROT_READ | PROT_WRITE); + } #endif - set_buf_free(offset); } inline bool recv_sys_t::validate_checkpoint() const noexcept @@ -4674,7 +4675,7 @@ goto err_exit; } ut_ad(recv_sys.file_checkpoint); - ut_ad(log_sys.get_lsn() >= recv_sys.scanned_lsn); + ut_ad(log_sys.get_flushed_lsn() >= recv_sys.scanned_lsn); if (rewind) { recv_sys.lsn = log_sys.next_checkpoint_lsn; recv_sys.offset = 0; @@ -4736,7 +4737,7 @@ tablespaces (not individual pages), while retaining the initial recv_sys.pages. */ mysql_mutex_lock(&recv_sys.mutex); - ut_ad(log_sys.get_lsn() >= recv_sys.lsn); + ut_ad(log_sys.get_flushed_lsn() >= recv_sys.lsn); recv_sys.clear(); recv_sys.lsn = log_sys.next_checkpoint_lsn; mysql_mutex_unlock(&recv_sys.mutex); @@ -4744,7 +4745,8 @@ if (srv_operation <= SRV_OPERATION_EXPORT_RESTORED) { mysql_mutex_lock(&recv_sys.mutex); - deferred_spaces.deferred_dblwr(log_sys.get_lsn()); + deferred_spaces.deferred_dblwr( + log_sys.get_flushed_lsn()); buf_dblwr.recover(); mysql_mutex_unlock(&recv_sys.mutex); } @@ -4777,16 +4779,6 @@ if (!srv_read_only_mode && log_sys.is_latest()) { log_sys.set_recovered(); - if (recv_needed_recovery - && srv_operation <= SRV_OPERATION_EXPORT_RESTORED - && recv_sys.lsn - log_sys.next_checkpoint_lsn - < log_sys.log_capacity) { - /* Write a FILE_CHECKPOINT marker as the first thing, - before generating any other redo log. This ensures - that subsequent crash recovery will be possible even - if the server were killed soon after this. */ - fil_names_clear(log_sys.next_checkpoint_lsn); - } } DBUG_EXECUTE_IF("before_final_redo_apply", goto err_exit;); @@ -4892,28 +4884,43 @@ goto check_if_corrupted; } -byte *recv_dblwr_t::find_encrypted_page(const fil_node_t &node, - uint32_t page_no, - byte *buf) noexcept +ATTRIBUTE_COLD +byte *recv_dblwr_t::find_deferred_page(const fil_node_t &node, + uint32_t page_no, + byte *buf) noexcept { - ut_ad(node.space->crypt_data); ut_ad(node.space->full_crc32()); mysql_mutex_lock(&recv_sys.mutex); byte *result_page= nullptr; + bool is_encrypted= node.space->crypt_data && + node.space->crypt_data->is_encrypted(); for (list::iterator page_it= pages.begin(); page_it != pages.end(); page_it++) { if (page_get_page_no(*page_it) != page_no || buf_page_is_corrupted(true, *page_it, node.space->flags)) continue; + + if (is_encrypted && + !mach_read_from_4(*page_it + FIL_PAGE_FCRC32_KEY_VERSION)) + continue; + memcpy(buf, *page_it, node.space->physical_size()); buf_tmp_buffer_t *slot= buf_pool.io_buf_reserve(false); ut_a(slot); slot->allocate(); - bool invalidate= - !fil_space_decrypt(node.space, slot->crypt_buf, buf) || - (node.space->is_compressed() && - !fil_page_decompress(slot->crypt_buf, buf, node.space->flags)); + + bool invalidate= false; + if (is_encrypted) + { + invalidate= !fil_space_decrypt(node.space, slot->crypt_buf, buf); + if (!invalidate && node.space->is_compressed()) + goto decompress; + } + else +decompress: + invalidate= !fil_page_decompress(slot->crypt_buf, buf, + node.space->flags); slot->release(); if (invalidate || diff -Nru mariadb-10.11.11/storage/innobase/mtr/mtr0mtr.cc mariadb-10.11.13/storage/innobase/mtr/mtr0mtr.cc --- mariadb-10.11.11/storage/innobase/mtr/mtr0mtr.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/mtr/mtr0mtr.cc 2025-05-19 16:14:25.000000000 +0000 @@ -44,7 +44,6 @@ #endif std::pair (*mtr_t::finisher)(mtr_t *, size_t); -unsigned mtr_t::spin_wait_delay; void mtr_t::finisher_update() { @@ -53,15 +52,12 @@ if (log_sys.is_mmap()) { commit_logger= mtr_t::commit_log; - finisher= spin_wait_delay - ? mtr_t::finish_writer : mtr_t::finish_writer; + finisher= mtr_t::finish_writer; return; } commit_logger= mtr_t::commit_log; #endif - finisher= - (spin_wait_delay - ? mtr_t::finish_writer : mtr_t::finish_writer); + finisher= mtr_t::finish_writer; } void mtr_memo_slot_t::release() const @@ -169,7 +165,7 @@ else flush_list_bytes+= block->physical_size(); - ut_ad(flush_list_bytes <= curr_pool_size); + ut_ad(flush_list_bytes <= size_in_bytes); if (prev) UT_LIST_INSERT_AFTER(flush_list, prev, &block->page); @@ -257,7 +253,7 @@ { if (block->page.oldest_modification() <= 1) { - log_sys.latch.rd_lock(SRW_LOCK_CALL); + log_sys.latch.wr_lock(SRW_LOCK_CALL); /* For unlogged mtrs (MTR_LOG_NO_REDO), we use the current system LSN. The mtr that generated the LSN is either already committed or in mtr_t::commit. Shared latch and relaxed atomics should be fine here as it is guaranteed @@ -269,7 +265,7 @@ mysql_mutex_lock(&buf_pool.flush_list_mutex); buf_pool.insert_into_flush_list (buf_pool.prepare_insert_into_flush_list(lsn), block, lsn); - log_sys.latch.rd_unlock(); + log_sys.latch.wr_unlock(); mysql_mutex_unlock(&buf_pool.flush_list_mutex); } } @@ -339,24 +335,11 @@ m_memo.clear(); } -inline lsn_t log_t::get_write_target() const -{ - ut_ad(latch_have_any()); - if (UNIV_LIKELY(buf_free_ok())) - return 0; - /* The LSN corresponding to the end of buf is - write_lsn - (first_lsn & 4095) + buf_free, - but we use simpler arithmetics to return a smaller write target in - order to minimize waiting in log_write_up_to(). */ - ut_ad(max_buf_free >= 4096 * 4); - return write_lsn + max_buf_free / 2; -} - template void mtr_t::commit_log(mtr_t *mtr, std::pair lsns) + noexcept { size_t modified= 0; - const lsn_t write_lsn= mmap ? 0 : log_sys.get_write_target(); if (mtr->m_made_dirty) { @@ -475,9 +458,6 @@ if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO)) buf_flush_ahead(mtr->m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC); - - if (!mmap && UNIV_UNLIKELY(write_lsn != 0)) - log_write_up_to(write_lsn, false); } /** Commit a mini-transaction. */ @@ -690,7 +670,7 @@ /* We will not encrypt any FILE_ records, but we will reserve a nonce at the end. */ size+= 8; - m_commit_lsn= log_sys.get_lsn(); + m_commit_lsn= log_sys.get_flushed_lsn(); } else m_commit_lsn= 0; @@ -775,7 +755,7 @@ /* We will not encrypt any FILE_ records, but we will reserve a nonce at the end. */ size+= 8; - m_commit_lsn= log_sys.get_lsn(); + m_commit_lsn= log_sys.get_flushed_lsn(); } else m_commit_lsn= 0; @@ -897,181 +877,109 @@ ? ". Shutdown is in progress" : ""); } -static ATTRIBUTE_NOINLINE void lsn_delay(size_t delay, size_t mult) noexcept +ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept { - delay*= mult * 2; // GCC 13.2.0 -O2 targeting AMD64 wants to unroll twice - HMT_low(); - do - MY_RELAX_CPU(); - while (--delay); - HMT_medium(); -} - -#if defined __clang_major__ && __clang_major__ < 10 -/* Only clang-10 introduced support for asm goto */ -#elif defined __APPLE__ -/* At least some versions of Apple Xcode do not support asm goto */ -#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) -# if SIZEOF_SIZE_T == 8 -# define LOCK_TSET \ - __asm__ goto("lock btsq $63, %0\n\t" "jnc %l1" \ - : : "m"(buf_free) : "cc", "memory" : got) -# else -# define LOCK_TSET \ - __asm__ goto("lock btsl $31, %0\n\t" "jnc %l1" \ - : : "m"(buf_free) : "cc", "memory" : got) -# endif -#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64) -# if SIZEOF_SIZE_T == 8 -# define LOCK_TSET \ - if (!_interlockedbittestandset64 \ - (reinterpret_cast(&buf_free), 63)) return -# else -# define LOCK_TSET \ - if (!_interlockedbittestandset \ - (reinterpret_cast(&buf_free), 31)) return -# endif -#endif - -#ifdef LOCK_TSET -ATTRIBUTE_NOINLINE -void log_t::lsn_lock_bts() noexcept -{ - LOCK_TSET; - { - const size_t m= mtr_t::spin_wait_delay; - constexpr size_t DELAY= 10, MAX_ITERATIONS= 10; - for (size_t delay_count= DELAY, delay_iterations= 1;; - lsn_delay(delay_iterations, m)) + if (UNIV_LIKELY(!ex)) + { + latch.rd_unlock(); + if (!late) { - if (!(buf_free.load(std::memory_order_relaxed) & buf_free_LOCK)) - LOCK_TSET; - if (!delay_count); - else if (delay_iterations < MAX_ITERATIONS) - delay_count= DELAY, delay_iterations++; - else - delay_count--; + /* Wait for all threads to back off. */ + latch.wr_lock(SRW_LOCK_CALL); + goto got_ex; } - } -# ifdef __GNUC__ - got: - return; -# endif -} + const auto delay= my_cpu_relax_multiplier / 4 * srv_spin_wait_delay; + const auto rounds= srv_n_spin_wait_rounds; -inline -#else -ATTRIBUTE_NOINLINE -#endif -size_t log_t::lock_lsn() noexcept -{ -#ifdef LOCK_TSET - lsn_lock_bts(); - return ~buf_free_LOCK & buf_free.load(std::memory_order_relaxed); -# undef LOCK_TSET -#else - size_t b= buf_free.fetch_or(buf_free_LOCK, std::memory_order_acquire); - if (b & buf_free_LOCK) - { - const size_t m= mtr_t::spin_wait_delay; - constexpr size_t DELAY= 10, MAX_ITERATIONS= 10; - for (size_t delay_count= DELAY, delay_iterations= 1; - ((b= buf_free.load(std::memory_order_relaxed)) & buf_free_LOCK) || - (buf_free_LOCK & (b= buf_free.fetch_or(buf_free_LOCK, - std::memory_order_acquire))); - lsn_delay(delay_iterations, m)) - if (!delay_count); - else if (delay_iterations < MAX_ITERATIONS) - delay_count= DELAY, delay_iterations++; - else - delay_count--; + for (;;) + { + HMT_low(); + for (auto r= rounds + 1; r--; ) + { + if (write_lsn_offset.load(std::memory_order_relaxed) & WRITE_BACKOFF) + { + for (auto d= delay; d--; ) + MY_RELAX_CPU(); + } + else + { + HMT_medium(); + goto done; + } + } + HMT_medium(); + std::this_thread::sleep_for(std::chrono::microseconds(100)); + } } - return b; -#endif -} - -template -ATTRIBUTE_COLD size_t log_t::append_prepare_wait(size_t b, bool ex, lsn_t lsn) - noexcept -{ - waits++; - ut_ad(buf_free.load(std::memory_order_relaxed) == - (spin ? (b | buf_free_LOCK) : b)); - if (spin) - buf_free.store(b, std::memory_order_release); else - lsn_lock.wr_unlock(); - - if (ex) + { + got_ex: + const uint64_t l= write_lsn_offset.load(std::memory_order_relaxed); + const lsn_t lsn= base_lsn.load(std::memory_order_relaxed) + + (l & (WRITE_BACKOFF - 1)); + waits++; +#ifdef HAVE_PMEM + const bool is_pmem{is_mmap()}; + if (is_pmem) + { + ut_ad(lsn - get_flushed_lsn(std::memory_order_relaxed) < capacity()); + persist(lsn); + } +#endif latch.wr_unlock(); - else - latch.rd_unlock(); - - log_write_up_to(lsn, is_mmap()); - - if (ex) - latch.wr_lock(SRW_LOCK_CALL); - else - latch.rd_lock(SRW_LOCK_CALL); - - if (spin) - return lock_lsn(); + /* write_buf() or persist() will clear the WRITE_BACKOFF flag, + which our caller will recheck. */ +#ifdef HAVE_PMEM + if (!is_pmem) +#endif + log_write_up_to(lsn, false); + if (ex) + { + latch.wr_lock(SRW_LOCK_CALL); + return; + } + } - lsn_lock.wr_lock(); - return buf_free.load(std::memory_order_relaxed); +done: + latch.rd_lock(SRW_LOCK_CALL); } /** Reserve space in the log buffer for appending data. -@tparam spin whether to use the spin-only lock_lsn() @tparam mmap log_sys.is_mmap() @param size total length of the data to append(), in bytes @param ex whether log_sys.latch is exclusively locked @return the start LSN and the buffer position for append() */ -template +template inline std::pair log_t::append_prepare(size_t size, bool ex) noexcept { ut_ad(ex ? latch_have_wr() : latch_have_rd()); ut_ad(mmap == is_mmap()); - if (!spin) - lsn_lock.wr_lock(); - size_t b{spin ? lock_lsn() : buf_free.load(std::memory_order_relaxed)}; - write_to_buf++; - - lsn_t l{lsn.load(std::memory_order_relaxed)}, end_lsn{l + size}; - - if (UNIV_UNLIKELY(mmap - ? (end_lsn - - get_flushed_lsn(std::memory_order_relaxed)) > capacity() - : b + size >= buf_size)) - { - b= append_prepare_wait(b, ex, l); - /* While flushing log, we had released the lsn lock and LSN could have - progressed in the meantime. */ - l= lsn.load(std::memory_order_relaxed); - end_lsn= l + size; - } - - size_t new_buf_free= b + size; - if (mmap && new_buf_free >= file_size) - new_buf_free-= size_t(capacity()); + ut_ad(!mmap || buf_size == std::min(capacity(), buf_size_max)); + const size_t buf_size{this->buf_size - size}; + uint64_t l; + static_assert(WRITE_TO_BUF == WRITE_BACKOFF << 1, ""); + while (UNIV_UNLIKELY((l= write_lsn_offset.fetch_add(size + WRITE_TO_BUF) & + (WRITE_TO_BUF - 1)) >= buf_size)) + { + /* The following is inlined here instead of being part of + append_prepare_wait(), in order to increase the locality of reference + and to set the WRITE_BACKOFF flag as soon as possible. */ + bool late(write_lsn_offset.fetch_or(WRITE_BACKOFF) & WRITE_BACKOFF); + /* Subtract our LSN overshoot. */ + write_lsn_offset.fetch_sub(size); + append_prepare_wait(late, ex); + } - lsn.store(end_lsn, std::memory_order_relaxed); + const lsn_t lsn{l + base_lsn.load(std::memory_order_relaxed)}, + end_lsn{lsn + size}; if (UNIV_UNLIKELY(end_lsn >= last_checkpoint_lsn + log_capacity)) set_check_for_checkpoint(true); - byte *our_buf= buf; - if (spin) - buf_free.store(new_buf_free, std::memory_order_release); - else - { - buf_free.store(new_buf_free, std::memory_order_relaxed); - lsn_lock.wr_unlock(); - } - - return {l, our_buf + b}; + return {lsn, + buf + size_t(mmap ? FIRST_LSN + (lsn - first_lsn) % capacity() : l)}; } /** Finish appending data to the log. @@ -1216,7 +1124,7 @@ if (!resize_flush_buf) { ut_ad(is_mmap()); - lsn_lock.wr_lock(); + resize_wrap_mutex.wr_lock(); const size_t resize_capacity{resize_target - START_OFFSET}; { const lsn_t resizing{resize_in_progress()}; @@ -1227,7 +1135,7 @@ if (UNIV_UNLIKELY(lsn < resizing)) { /* This function may execute in multiple concurrent threads - that hold a shared log_sys.latch. Before we got lsn_lock, + that hold a shared log_sys.latch. Before we got resize_wrap_mutex, another thread could have executed resize_lsn.store(lsn) below with a larger lsn than ours. @@ -1277,7 +1185,7 @@ ut_ad(resize_buf[s] <= 1); resize_buf[s]= 1; mmap_done: - lsn_lock.wr_unlock(); + resize_wrap_mutex.wr_unlock(); } else #endif @@ -1304,7 +1212,7 @@ d+= size; } -template +template std::pair mtr_t::finish_writer(mtr_t *mtr, size_t len) { @@ -1315,7 +1223,7 @@ const size_t size{mtr->m_commit_lsn ? 5U + 8U : 5U}; std::pair start= - log_sys.append_prepare(len, mtr->m_latch_ex); + log_sys.append_prepare(len, mtr->m_latch_ex); if (!mmap) { diff -Nru mariadb-10.11.11/storage/innobase/os/os0file.cc mariadb-10.11.13/storage/innobase/os/os0file.cc --- mariadb-10.11.11/storage/innobase/os/os0file.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/os/os0file.cc 2025-05-19 16:14:25.000000000 +0000 @@ -2314,8 +2314,20 @@ ut_ad(exists); #endif /* UNIV_DEBUG */ - if (MoveFileEx(oldpath, newpath, MOVEFILE_REPLACE_EXISTING)) { - return(true); + for (int retry= 50;; retry--){ + if (MoveFileEx(oldpath, newpath, MOVEFILE_REPLACE_EXISTING)) + return true; + + if (!retry) + break; + + if (GetLastError() != ERROR_SHARING_VIOLATION) + break; + + // oldpath was opened by someone else (antivirus?) + //without FILE_SHARE_DELETE flag. Retry operation + + Sleep(10); } os_file_handle_rename_error(oldpath, newpath); @@ -3357,6 +3369,12 @@ return pending; } +/** @return approximate number of pending writes */ +size_t os_aio_pending_writes_approx() noexcept +{ + return write_slots->pending_io_count(); +} + /** Wait until all pending asynchronous reads have completed. @param declare whether the wait will be declared in tpool */ void os_aio_wait_until_no_pending_reads(bool declare) noexcept diff -Nru mariadb-10.11.11/storage/innobase/pars/pars0pars.cc mariadb-10.11.13/storage/innobase/pars/pars0pars.cc --- mariadb-10.11.11/storage/innobase/pars/pars0pars.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/pars/pars0pars.cc 2025-05-19 16:14:25.000000000 +0000 @@ -783,11 +783,6 @@ { ulint count = 0; - if (sym_node == NULL) { - - return(count); - } - while (sym_node) { pars_retrieve_table_def(sym_node); diff -Nru mariadb-10.11.11/storage/innobase/row/row0ins.cc mariadb-10.11.13/storage/innobase/row/row0ins.cc --- mariadb-10.11.11/storage/innobase/row/row0ins.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/row/row0ins.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1955,7 +1955,7 @@ TRUE, foreign, table, ref_tuple, thr); if (ref_table) { - dict_table_close(ref_table); + ref_table->release(); } } } @@ -2580,12 +2580,44 @@ } } -#if defined __aarch64__&&defined __GNUC__&&__GNUC__==4&&!defined __clang__ -/* Avoid GCC 4.8.5 internal compiler error due to srw_mutex::wr_unlock(). -We would only need this for row_ins_clust_index_entry_low(), -but GCC 4.8.5 does not support pop_options. */ -# pragma GCC optimize ("O0") -#endif +/** Parse the integer data from specified data, which could be +DATA_INT, DATA_FLOAT or DATA_DOUBLE. If the value is less than 0 +and the type is not unsigned then we reset the value to 0 +@param data data to read +@param len length of data +@param mtype main type of the column +@param prtype precise type of the column +@return the integer value from the data +@retval 0 if the value is negative or the type or length invalid */ +static uint64_t row_parse_int(const byte *data, size_t len, + ulint mtype, ulint prtype) noexcept +{ + switch (mtype) { + case DATA_FLOAT: + if (len != sizeof(float)) + return 0; + { + float f= mach_float_read(data); + return f <= 0.0 ? 0 : uint64_t(f); + } + case DATA_DOUBLE: + if (len != sizeof(double)) + return 0; + { + double d= mach_double_read(data); + return d <= 0.0 ? 0 : uint64_t(d); + } + case DATA_INT: + if (len == 0 || len > 8) + return 0; + const ibool unsigned_type{prtype & DATA_UNSIGNED}; + uint64_t value= mach_read_int_type(data, len, unsigned_type); + return !unsigned_type && int64_t(value) < 0 ? 0 : value; + } + + ut_ad("invalid type" == 0); + return 0; +} /***************************************************************//** Tries to insert an entry into a clustered index, ignoring foreign key @@ -2672,8 +2704,7 @@ dfield->data), dfield->len, dfield->type.mtype, - dfield->type.prtype - & DATA_UNSIGNED); + dfield->type.prtype); if (auto_inc && mode != BTR_MODIFY_TREE) { mode = btr_latch_mode( @@ -2722,6 +2753,12 @@ DBUG_EXECUTE_IF("row_ins_row_level", goto row_level_insert;); +#ifdef WITH_WSREP + /* Appliers never execute bulk insert statements directly. */ + if (trx->is_wsrep() && !wsrep_thd_is_local_transaction(trx->mysql_thd)) + goto row_level_insert; +#endif /* WITH_WSREP */ + if (!(flags & BTR_NO_UNDO_LOG_FLAG) && page_is_empty(block->page.frame) && !entry->is_metadata() && !trx->duplicates @@ -2738,28 +2775,24 @@ && !index->table->has_spatial_index()) { ut_ad(!index->table->skip_alter_undo); - trx->bulk_insert = true; + trx->bulk_insert = TRX_DML_BULK; err = lock_table(index->table, NULL, LOCK_X, thr); if (err != DB_SUCCESS) { trx->error_state = err; - trx->bulk_insert = false; + trx->bulk_insert = TRX_NO_BULK; goto err_exit; } if (index->table->n_rec_locks) { avoid_bulk: - trx->bulk_insert = false; + trx->bulk_insert = TRX_NO_BULK; goto row_level_insert; } #ifdef WITH_WSREP - if (trx->is_wsrep()) + if (trx->is_wsrep() && + wsrep_append_table_key(trx->mysql_thd, *index->table)) { - if (!wsrep_thd_is_local_transaction(trx->mysql_thd)) - goto row_level_insert; - if (wsrep_append_table_key(trx->mysql_thd, *index->table)) - { - trx->error_state = DB_ROLLBACK; - goto err_exit; - } + trx->error_state = DB_ROLLBACK; + goto err_exit; } #endif /* WITH_WSREP */ @@ -2811,7 +2844,7 @@ bulk buffer and doesn't check for constraint validity of foreign key relationship. */ trx_start_if_not_started(trx, true); - trx->bulk_insert = true; + trx->bulk_insert = TRX_DDL_BULK; auto m = trx->mod_tables.emplace(index->table, 0); m.first->second.start_bulk_insert(index->table, true); err = m.first->second.bulk_insert_buffered( @@ -3891,3 +3924,79 @@ return(thr); } + +/** Read the AUTOINC column from an index record +@param index index of the record +@param rec the record +@return value read from the first column +@retval 0 if the value would be NULL or negative */ +static uint64_t row_read_autoinc(const dict_index_t &index, const rec_t *rec) + noexcept +{ + const dict_field_t &field= index.fields[0]; + ut_ad(!DATA_BIG_COL(field.col)); + ut_ad(!(rec_get_info_bits(rec, index.table->not_redundant()) & + (REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG))); + mem_heap_t *heap= nullptr; + rec_offs offsets_[REC_OFFS_HEADER_SIZE + 2]; + rec_offs_init(offsets_); + rec_offs *offsets= rec_get_offsets(rec, &index, offsets_, + index.n_core_fields, 1, &heap); + ut_ad(!heap); + + size_t len; + ut_d(size_t first_offset=) rec_get_nth_field_offs(offsets, 0, &len); + ut_ad(!first_offset); + return row_parse_int(rec, len, field.col->mtype, field.col->prtype); +} + +/** Get the maximum and non-delete-marked record in an index. +@param index index B-tree +@param mtr mini-transaction (may be committed and restarted) +@return maximum record, page s-latched in mtr +@retval nullptr if there are no records, or if all of them are delete-marked */ +static +const rec_t *row_search_get_max_rec(dict_index_t *index, mtr_t *mtr) noexcept +{ + btr_pcur_t pcur; + const bool desc= index->fields[0].descending; + + /* Open at the high/right end (false), and init cursor */ + if (pcur.open_leaf(desc, index, BTR_SEARCH_LEAF, mtr) != DB_SUCCESS) + return nullptr; + + if (desc) + { + const bool comp= index->table->not_redundant(); + while (btr_pcur_move_to_next_user_rec(&pcur, mtr)) + { + const rec_t *rec= btr_pcur_get_rec(&pcur); + if (!rec_is_metadata(rec, comp) && !rec_get_deleted_flag(rec, comp)) + return rec; + } + return nullptr; + } + + do + { + const page_t *page= btr_pcur_get_page(&pcur); + const rec_t *rec= page_find_rec_last_not_deleted(page); + if (page_rec_is_user_rec_low(rec - page)) + return rec; + btr_pcur_move_before_first_on_page(&pcur); + } + while (btr_pcur_move_to_prev(&pcur, mtr)); + + return nullptr; +} + +uint64_t row_search_max_autoinc(dict_index_t *index) noexcept +{ + uint64_t value= 0; + mtr_t mtr; + mtr.start(); + if (const rec_t *rec= row_search_get_max_rec(index, &mtr)) + value= row_read_autoinc(*index, rec); + mtr.commit(); + return value; +} diff -Nru mariadb-10.11.11/storage/innobase/row/row0log.cc mariadb-10.11.13/storage/innobase/row/row0log.cc --- mariadb-10.11.11/storage/innobase/row/row0log.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/row/row0log.cc 2025-05-19 16:14:25.000000000 +0000 @@ -4065,21 +4065,20 @@ if (!(this->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { for (ulint i = 0; i < dict_table_get_n_v_cols(table); i++) - dfield_get_type( - dtuple_get_nth_v_field(row, i))->mtype = DATA_MISSING; + dfield_get_type(dtuple_get_nth_v_field(row, i))->mtype = DATA_MISSING; } + if (table->n_v_cols) + row_upd_replace_vcol(row, table, update, false, nullptr, + (cmpl_info & UPD_NODE_NO_ORD_CHANGE) + ? nullptr : undo_rec); + if (is_update) { old_row= dtuple_copy(row, heap); row_upd_replace(old_row, &old_ext, clust_index, update, heap); } - if (table->n_v_cols) - row_upd_replace_vcol(row, table, update, false, nullptr, - (cmpl_info & UPD_NODE_NO_ORD_CHANGE) - ? nullptr : undo_rec); - bool success= true; dict_index_t *index= dict_table_get_next_index(clust_index); while (index) diff -Nru mariadb-10.11.11/storage/innobase/row/row0mysql.cc mariadb-10.11.13/storage/innobase/row/row0mysql.cc --- mariadb-10.11.11/storage/innobase/row/row0mysql.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/row/row0mysql.cc 2025-05-19 16:14:25.000000000 +0000 @@ -69,7 +69,7 @@ /** Delay an INSERT, DELETE or UPDATE operation if the purge is lagging. */ -static void row_mysql_delay_if_needed() +static void row_mysql_delay_if_needed() noexcept { const auto delay= srv_dml_needed_delay; if (UNIV_UNLIKELY(delay != 0)) @@ -78,8 +78,8 @@ log_sys.latch.rd_lock(SRW_LOCK_CALL); const lsn_t last= log_sys.last_checkpoint_lsn, max_age= log_sys.max_checkpoint_age; + const lsn_t lsn= log_sys.get_flushed_lsn(); log_sys.latch.rd_unlock(); - const lsn_t lsn= log_sys.get_lsn(); if ((lsn - last) / 4 >= max_age / 5) buf_flush_ahead(last + max_age / 5, false); purge_sys.wake_if_not_active(); @@ -687,8 +687,12 @@ /* MariaDB will roll back the latest SQL statement */ break; } - /* MariaDB will roll back the entire transaction. */ - trx->bulk_insert = false; + /* For DML, InnoDB does partial rollback and clear + bulk buffer in row_mysql_handle_errors(). + For ALTER TABLE ALGORITHM=COPY & CREATE TABLE...SELECT, + the bulk insert transaction will be rolled back inside + ha_innobase::extra(HA_EXTRA_ABORT_ALTER_COPY) */ + trx->bulk_insert &= TRX_DDL_BULK; trx->last_stmt_start = 0; break; case DB_LOCK_WAIT: @@ -981,7 +985,7 @@ rtr_clean_rtr_info(prebuilt->rtr_info, true); } if (prebuilt->table) { - dict_table_close(prebuilt->table); + prebuilt->table->release(); } mem_heap_free(prebuilt->heap); @@ -1599,7 +1603,7 @@ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED); ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); - ut_ad(table->stat_initialized); + ut_ad(table->stat_initialized()); if (!table->is_readable()) { return row_mysql_get_table_error(trx, table); @@ -2159,11 +2163,9 @@ index = node->index; - ut_ad(!index == (err != DB_SUCCESS)); - que_graph_free((que_t*) que_node_get_parent(thr)); - if (index && (index->type & DICT_FTS)) { + if (err == DB_SUCCESS && (index->type & DICT_FTS)) { err = fts_create_index_tables(trx, index, table->id); } diff -Nru mariadb-10.11.11/storage/innobase/row/row0purge.cc mariadb-10.11.13/storage/innobase/row/row0purge.cc --- mariadb-10.11.11/storage/innobase/row/row0purge.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/row/row0purge.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1564,7 +1564,7 @@ case TRX_UNDO_DEL_MARK_REC: purged = row_purge_del_mark(node); if (purged) { - if (node->table->stat_initialized + if (node->table->stat_initialized() && srv_stats_include_delete_marked) { dict_stats_update_if_needed( node->table, *thr->graph->trx); diff -Nru mariadb-10.11.11/storage/innobase/row/row0sel.cc mariadb-10.11.13/storage/innobase/row/row0sel.cc --- mariadb-10.11.11/storage/innobase/row/row0sel.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/row/row0sel.cc 2025-05-19 16:14:25.000000000 +0000 @@ -6852,123 +6852,3 @@ goto rec_loop; } - -/*******************************************************************//** -Read the AUTOINC column from the current row. If the value is less than -0 and the type is not unsigned then we reset the value to 0. -@return value read from the column */ -static -ib_uint64_t -row_search_autoinc_read_column( -/*===========================*/ - dict_index_t* index, /*!< in: index to read from */ - const rec_t* rec, /*!< in: current rec */ - ulint col_no, /*!< in: column number */ - ulint mtype, /*!< in: column main type */ - ibool unsigned_type) /*!< in: signed or unsigned flag */ -{ - ulint len; - const byte* data; - ib_uint64_t value; - mem_heap_t* heap = NULL; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets = offsets_; - - rec_offs_init(offsets_); - ut_ad(page_rec_is_leaf(rec)); - - offsets = rec_get_offsets(rec, index, offsets, index->n_core_fields, - col_no + 1, &heap); - - if (rec_offs_nth_sql_null(offsets, col_no)) { - /* There is no non-NULL value in the auto-increment column. */ - value = 0; - goto func_exit; - } - - data = rec_get_nth_field(rec, offsets, col_no, &len); - - value = row_parse_int(data, len, mtype, unsigned_type); - -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return(value); -} - -/** Get the maximum and non-delete-marked record in an index. -@param[in] index index tree -@param[in,out] mtr mini-transaction (may be committed and restarted) -@return maximum record, page s-latched in mtr -@retval NULL if there are no records, or if all of them are delete-marked */ -static -const rec_t* -row_search_get_max_rec( - dict_index_t* index, - mtr_t* mtr) -{ - btr_pcur_t pcur; - const rec_t* rec; - const bool desc = index->fields[0].descending; - - if (pcur.open_leaf(desc, index, BTR_SEARCH_LEAF, mtr) != DB_SUCCESS) { - return nullptr; - } - - if (desc) { - const bool comp = index->table->not_redundant(); - while (btr_pcur_move_to_next_user_rec(&pcur, mtr)) { - rec = btr_pcur_get_rec(&pcur); - if (rec_is_metadata(rec, *index)) { - continue; - } - if (!rec_get_deleted_flag(rec, comp)) { - goto found; - } - } - } else { - do { - rec = page_find_rec_last_not_deleted( - btr_pcur_get_page(&pcur)); - if (page_rec_is_user_rec(rec)) { - goto found; - } - btr_pcur_move_before_first_on_page(&pcur); - } while (btr_pcur_move_to_prev(&pcur, mtr)); - } - - rec = nullptr; - -found: - ut_ad(!rec - || !(rec_get_info_bits(rec, dict_table_is_comp(index->table)) - & (REC_INFO_MIN_REC_FLAG | REC_INFO_DELETED_FLAG))); - return(rec); -} - -/** Read the max AUTOINC value from an index. -@param[in] index index starting with an AUTO_INCREMENT column -@return the largest AUTO_INCREMENT value -@retval 0 if no records were found */ -ib_uint64_t -row_search_max_autoinc(dict_index_t* index) -{ - const dict_field_t* dfield = dict_index_get_nth_field(index, 0); - - ib_uint64_t value = 0; - - mtr_t mtr; - mtr.start(); - - if (const rec_t* rec = row_search_get_max_rec(index, &mtr)) { - value = row_search_autoinc_read_column( - index, rec, 0, - dfield->col->mtype, - dfield->col->prtype & DATA_UNSIGNED); - } - - mtr.commit(); - return(value); -} diff -Nru mariadb-10.11.11/storage/innobase/row/row0uins.cc mariadb-10.11.13/storage/innobase/row/row0uins.cc --- mariadb-10.11.11/storage/innobase/row/row0uins.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/row/row0uins.cc 2025-05-19 16:14:25.000000000 +0000 @@ -244,8 +244,7 @@ btr_pcur_commit_specify_mtr(&node->pcur, &mtr); if (UNIV_LIKELY_NULL(table)) { - dict_table_close(table, dict_locked, - node->trx->mysql_thd, mdl_ticket); + dict_table_close(table, node->trx->mysql_thd, mdl_ticket); } return(err); @@ -452,7 +451,7 @@ would probably be better to just drop all temporary tables (and temporary undo log records) of the current connection, instead of doing this rollback. */ - dict_table_close(node->table, dict_locked); + node->table->release(); node->table = NULL; return false; } else { @@ -614,7 +613,7 @@ err = row_undo_ins_remove_clust_rec(node); } - if (err == DB_SUCCESS && node->table->stat_initialized) { + if (err == DB_SUCCESS && node->table->stat_initialized()) { /* Not protected by dict_sys.latch or table->stats_mutex_lock() for performance reasons, we would rather get garbage @@ -644,8 +643,7 @@ break; } - dict_table_close(node->table, dict_locked); - + node->table->release(); node->table = NULL; return(err); diff -Nru mariadb-10.11.11/storage/innobase/row/row0umod.cc mariadb-10.11.13/storage/innobase/row/row0umod.cc --- mariadb-10.11.11/storage/innobase/row/row0umod.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/row/row0umod.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1259,7 +1259,7 @@ would probably be better to just drop all temporary tables (and temporary undo log records) of the current connection, instead of doing this rollback. */ - dict_table_close(node->table, dict_locked); + node->table->release(); node->table = NULL; return false; } @@ -1388,7 +1388,7 @@ bool update_statistics = !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE); - if (err == DB_SUCCESS && node->table->stat_initialized) { + if (err == DB_SUCCESS && node->table->stat_initialized()) { switch (node->rec_type) { case TRX_UNDO_UPD_EXIST_REC: break; @@ -1418,8 +1418,7 @@ } } - dict_table_close(node->table, dict_locked); - + node->table->release(); node->table = NULL; return(err); diff -Nru mariadb-10.11.11/storage/innobase/row/row0upd.cc mariadb-10.11.13/storage/innobase/row/row0upd.cc --- mariadb-10.11.11/storage/innobase/row/row0upd.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/row/row0upd.cc 2025-05-19 16:14:25.000000000 +0000 @@ -253,7 +253,7 @@ FALSE, foreign, table, entry, thr); if (ref_table) { - dict_table_close(ref_table); + ref_table->release(); } if (err != DB_SUCCESS) { @@ -338,7 +338,7 @@ TRUE, foreign, table, entry, thr); if (opened) { - dict_table_close(opened); + opened->release(); } if (err != DB_SUCCESS) { diff -Nru mariadb-10.11.11/storage/innobase/srv/srv0mon.cc mariadb-10.11.13/storage/innobase/srv/srv0mon.cc --- mariadb-10.11.11/storage/innobase/srv/srv0mon.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/srv/srv0mon.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1366,12 +1366,13 @@ /* innodb_buffer_pool_pages_total */ case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL: - value = buf_pool.get_n_pages(); + case MONITOR_OVLD_BUFFER_POOL_SIZE: + value = buf_pool.curr_size(); break; /* innodb_buffer_pool_pages_misc */ case MONITOR_OVLD_BUF_POOL_PAGE_MISC: - value = buf_pool.get_n_pages() + value = buf_pool.curr_size() - UT_LIST_GET_LEN(buf_pool.LRU) - UT_LIST_GET_LEN(buf_pool.free); break; @@ -1453,7 +1454,7 @@ /* innodb_os_log_written */ case MONITOR_OVLD_OS_LOG_WRITTEN: - value = log_sys.get_lsn() - recv_sys.lsn; + value = log_get_lsn() - recv_sys.lsn; break; /* innodb_log_waits */ @@ -1490,10 +1491,6 @@ value = srv_page_size; break; - case MONITOR_OVLD_BUFFER_POOL_SIZE: - value = srv_buf_pool_size; - break; - /* innodb_row_lock_current_waits */ case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT: // dirty read without lock_sys.wait_mutex @@ -1590,7 +1587,7 @@ break; case MONITOR_OVLD_LSN_CURRENT: - value = log_sys.get_lsn(); + value = log_get_lsn(); break; case MONITOR_OVLD_CHECKPOINTS: @@ -1598,10 +1595,10 @@ break; case MONITOR_LSN_CHECKPOINT_AGE: - log_sys.latch.rd_lock(SRW_LOCK_CALL); + log_sys.latch.wr_lock(SRW_LOCK_CALL); value = static_cast(log_sys.get_lsn() - log_sys.last_checkpoint_lsn); - log_sys.latch.rd_unlock(); + log_sys.latch.wr_unlock(); break; case MONITOR_OVLD_BUF_OLDEST_LSN: diff -Nru mariadb-10.11.11/storage/innobase/srv/srv0srv.cc mariadb-10.11.13/storage/innobase/srv/srv0srv.cc --- mariadb-10.11.11/storage/innobase/srv/srv0srv.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/srv/srv0srv.cc 2025-05-19 16:14:25.000000000 +0000 @@ -178,16 +178,6 @@ with mysql_mutex_lock(), which will wait until it gets the mutex. */ #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) -/** copy of innodb_buffer_pool_size */ -ulint srv_buf_pool_size; -/** Requested buffer pool chunk size */ -size_t srv_buf_pool_chunk_unit; -/** Previously requested size */ -ulint srv_buf_pool_old_size; -/** Current size as scaling factor for the other components */ -ulint srv_buf_pool_base_size; -/** Current size in bytes */ -ulint srv_buf_pool_curr_size; /** Dump this % of each buffer pool during BP dump */ ulong srv_buf_pool_dump_pct; /** Abort load after this amount of pages */ @@ -291,13 +281,13 @@ in the innodb database. * quick transient stats, that are used if persistent stats for the given table/index are not found in the innodb database */ -unsigned long long srv_stats_transient_sample_pages; +uint32_t srv_stats_transient_sample_pages; /** innodb_stats_persistent */ my_bool srv_stats_persistent; /** innodb_stats_include_delete_marked */ my_bool srv_stats_include_delete_marked; /** innodb_stats_persistent_sample_pages */ -unsigned long long srv_stats_persistent_sample_pages; +uint32_t srv_stats_persistent_sample_pages; /** innodb_stats_auto_recalc */ my_bool srv_stats_auto_recalc; @@ -901,6 +891,7 @@ export_vars.innodb_buffer_pool_read_requests = buf_pool.stat.n_page_gets; + mysql_mutex_lock(&buf_pool.mutex); export_vars.innodb_buffer_pool_bytes_data = buf_pool.stat.LRU_bytes + (UT_LIST_GET_LEN(buf_pool.unzip_LRU) @@ -910,12 +901,21 @@ export_vars.innodb_buffer_pool_pages_latched = buf_get_latched_pages_number(); #endif /* UNIV_DEBUG */ - export_vars.innodb_buffer_pool_pages_total = buf_pool.get_n_pages(); + export_vars.innodb_buffer_pool_pages_total = buf_pool.curr_size(); export_vars.innodb_buffer_pool_pages_misc = - buf_pool.get_n_pages() + export_vars.innodb_buffer_pool_pages_total - UT_LIST_GET_LEN(buf_pool.LRU) - UT_LIST_GET_LEN(buf_pool.free); + if (size_t shrinking = buf_pool.is_shrinking()) { + snprintf(export_vars.innodb_buffer_pool_resize_status, + sizeof export_vars.innodb_buffer_pool_resize_status, + "Withdrawing blocks. (%zu/%zu).", + buf_pool.to_withdraw(), shrinking); + } else { + export_vars.innodb_buffer_pool_resize_status[0] = '\0'; + } + mysql_mutex_unlock(&buf_pool.mutex); export_vars.innodb_max_trx_id = trx_sys.get_max_trx_id(); export_vars.innodb_history_list_length = trx_sys.history_size_approx(); @@ -979,13 +979,13 @@ mysql_mutex_unlock(&srv_innodb_monitor_mutex); - log_sys.latch.rd_lock(SRW_LOCK_CALL); + log_sys.latch.wr_lock(SRW_LOCK_CALL); export_vars.innodb_lsn_current = log_sys.get_lsn(); export_vars.innodb_lsn_flushed = log_sys.get_flushed_lsn(); export_vars.innodb_lsn_last_checkpoint = log_sys.last_checkpoint_lsn; export_vars.innodb_checkpoint_max_age = static_cast( log_sys.max_checkpoint_age); - log_sys.latch.rd_unlock(); + log_sys.latch.wr_unlock(); export_vars.innodb_os_log_written = export_vars.innodb_lsn_current - recv_sys.lsn; @@ -1072,7 +1072,7 @@ /* Try to track a strange bug reported by Harald Fuchs and others, where the lsn seems to decrease at times */ - lsn_t new_lsn = log_sys.get_lsn(); + lsn_t new_lsn = log_get_lsn(); ut_a(new_lsn >= old_lsn); old_lsn = new_lsn; @@ -1088,6 +1088,7 @@ now -= start; ulong waited = static_cast(now / 1000000); if (waited >= threshold) { + buf_pool.print_flush_info(); ib::fatal() << dict_sys.fatal_msg; } diff -Nru mariadb-10.11.11/storage/innobase/srv/srv0start.cc mariadb-10.11.13/storage/innobase/srv/srv0start.cc --- mariadb-10.11.11/storage/innobase/srv/srv0start.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/srv/srv0start.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1057,7 +1057,7 @@ /** Prepare to delete the redo log file. Flush the dirty pages from all the buffer pools. Flush the redo log buffer to the redo log file. @return lsn upto which data pages have been flushed. */ -static lsn_t srv_prepare_to_delete_redo_log_file() +static lsn_t srv_prepare_to_delete_redo_log_file() noexcept { DBUG_ENTER("srv_prepare_to_delete_redo_log_file"); @@ -1071,7 +1071,7 @@ log_sys.latch.wr_lock(SRW_LOCK_CALL); const bool latest_format{log_sys.is_latest()}; - lsn_t flushed_lsn{log_sys.get_lsn()}; + lsn_t flushed_lsn{log_sys.get_flushed_lsn(std::memory_order_relaxed)}; if (latest_format && !(log_sys.file_size & 4095) && flushed_lsn != log_sys.next_checkpoint_lsn + @@ -1079,6 +1079,11 @@ ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT)) { +#ifdef HAVE_PMEM + if (!log_sys.is_opened()) + log_sys.buf_size= unsigned(std::min(log_sys.capacity(), + log_sys.buf_size_max)); +#endif fil_names_clear(flushed_lsn); flushed_lsn= log_sys.get_lsn(); } @@ -1119,7 +1124,7 @@ if (latest_format) log_write_up_to(flushed_lsn, false); - ut_ad(flushed_lsn == log_sys.get_lsn()); + ut_ad(flushed_lsn == log_get_lsn()); ut_ad(!os_aio_pending_reads()); ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex)); ut_ad(!buf_pool.get_oldest_modification(0)); @@ -1134,6 +1139,18 @@ nullptr, &rollback_all_recovered_group); +inline lsn_t log_t::init_lsn() noexcept +{ + latch.wr_lock(SRW_LOCK_CALL); + ut_ad(!write_lsn_offset); + write_lsn_offset= 0; + const lsn_t lsn{base_lsn.load(std::memory_order_relaxed)}; + flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); + write_lsn= lsn; + latch.wr_unlock(); + return lsn; +} + /** Start InnoDB. @param[in] create_new_db whether to create a new database @return DB_SUCCESS or error code */ @@ -1288,34 +1305,13 @@ fil_system.create(srv_file_per_table ? 50000 : 5000); - ib::info() << "Initializing buffer pool, total size = " - << ib::bytes_iec{srv_buf_pool_size} - << ", chunk size = " << ib::bytes_iec{srv_buf_pool_chunk_unit}; - if (buf_pool.create()) { - ib::error() << "Cannot allocate memory for the buffer pool"; - return(srv_init_abort(DB_ERROR)); } - ib::info() << "Completed initialization of buffer pool"; - -#ifdef UNIV_DEBUG - /* We have observed deadlocks with a 5MB buffer pool but - the actual lower limit could very well be a little higher. */ - - if (srv_buf_pool_size <= 5 * 1024 * 1024) { - - ib::info() << "Small buffer pool size (" - << ib::bytes_iec{srv_buf_pool_size} - << "), the flst_validate() debug function can cause a" - << " deadlock if the buffer pool fills up."; - } -#endif /* UNIV_DEBUG */ - log_sys.create(); recv_sys.create(); - lock_sys.create(srv_lock_table_size); + lock_sys.create(srv_lock_table_size = 5 * buf_pool.curr_size()); srv_startup_is_before_trx_rollback_phase = true; diff -Nru mariadb-10.11.11/storage/innobase/trx/trx0purge.cc mariadb-10.11.13/storage/innobase/trx/trx0purge.cc --- mariadb-10.11.11/storage/innobase/trx/trx0purge.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/trx/trx0purge.cc 2025-05-19 16:14:25.000000000 +0000 @@ -1052,16 +1052,25 @@ /** Close all tables that were opened in a purge batch for a worker. @param node purge task context @param thd purge coordinator thread handle */ -static void trx_purge_close_tables(purge_node_t *node, THD *thd) +static void trx_purge_close_tables(purge_node_t *node, THD *thd) noexcept { for (auto &t : node->tables) { - if (!t.second.first); - else if (t.second.first == reinterpret_cast(-1)); - else + dict_table_t *table= t.second.first; + if (table != nullptr && table != reinterpret_cast(-1)) + table->release(); + } + + MDL_context *mdl_context= static_cast(thd_mdl_context(thd)); + + for (auto &t : node->tables) + { + dict_table_t *table= t.second.first; + if (table != nullptr && table != reinterpret_cast(-1)) { - dict_table_close(t.second.first, false, thd, t.second.second); t.second.first= reinterpret_cast(-1); + if (mdl_context != nullptr && t.second.second != nullptr) + mdl_context->release_lock(t.second.second); } } } @@ -1073,36 +1082,35 @@ } __attribute__((nonnull)) -/** Aqcuire a metadata lock on a table. +/** Acquire a metadata lock on a table. @param table table handle @param mdl_context metadata lock acquisition context -@param mdl metadata lcok +@param mdl metadata lock @return table handle @retval nullptr if the table is not found or accessible @retval -1 if the purge of history must be suspended due to DDL */ static dict_table_t *trx_purge_table_acquire(dict_table_t *table, MDL_context *mdl_context, - MDL_ticket **mdl) + MDL_ticket **mdl) noexcept { ut_ad(dict_sys.frozen_not_locked()); *mdl= nullptr; if (!table->is_readable() || table->corrupted) - { - table->release(); return nullptr; - } size_t db_len= dict_get_db_name_len(table->name.m_name); if (db_len == 0) - return table; /* InnoDB system tables are not covered by MDL */ + { + /* InnoDB system tables are not covered by MDL */ + got_table: + table->acquire(); + return table; + } if (purge_sys.must_wait_FTS()) - { must_wait: - table->release(); return reinterpret_cast(-1); - } char db_buf[NAME_LEN + 1]; char tbl_buf[NAME_LEN + 1]; @@ -1110,7 +1118,7 @@ if (!table->parse_name(db_buf, tbl_buf, &db_len, &tbl_len)) /* The name of an intermediate table starts with #sql */ - return table; + goto got_table; { MDL_request request; @@ -1123,37 +1131,38 @@ goto must_wait; } - return table; + goto got_table; } /** Open a table handle for the purge of committed transaction history @param table_id InnoDB table identifier @param mdl_context metadata lock acquisition context -@param mdl metadata lcok +@param mdl metadata lock @return table handle @retval nullptr if the table is not found or accessible @retval -1 if the purge of history must be suspended due to DDL */ static dict_table_t *trx_purge_table_open(table_id_t table_id, MDL_context *mdl_context, - MDL_ticket **mdl) + MDL_ticket **mdl) noexcept { - dict_sys.freeze(SRW_LOCK_CALL); - - dict_table_t *table= dict_sys.find_table(table_id); + dict_table_t *table; - if (table) - table->acquire(); - else + for (;;) { + dict_sys.freeze(SRW_LOCK_CALL); + table= dict_sys.find_table(table_id); + if (table) + break; dict_sys.unfreeze(); dict_sys.lock(SRW_LOCK_CALL); table= dict_load_table_on_id(table_id, DICT_ERR_IGNORE_FK_NOKEY); - if (table) - table->acquire(); dict_sys.unlock(); if (!table) return nullptr; - dict_sys.freeze(SRW_LOCK_CALL); + /* At this point, the freshly loaded table may already have been evicted. + We must look it up again while holding a shared dict_sys.latch. We keep + trying this until the table is found in the cache or it cannot be found + in the dictionary (because the table has been dropped or rebuilt). */ } table= trx_purge_table_acquire(table, mdl_context, mdl); @@ -1172,10 +1181,7 @@ for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; thr= UT_LIST_GET_NEXT(thrs, thr)) - { - purge_node_t *node= static_cast(thr->child); - trx_purge_close_tables(node, thd); - } + trx_purge_close_tables(static_cast(thr->child), thd); m_active= false; wait_FTS(false); @@ -1198,7 +1204,7 @@ if (t.second.first == reinterpret_cast(-1)) { if (table) - dict_table_close(table, false, thd, *mdl); + dict_table_close(table, thd, *mdl); goto retry; } } @@ -1231,9 +1237,6 @@ static_cast(thd_mdl_context(thd)); ut_ad(mdl_context); - const size_t max_pages= - std::min(buf_pool.curr_size * 3 / 4, size_t{srv_purge_batch_size}); - while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) { /* Track the max {trx_id, undo_no} for truncating the @@ -1283,12 +1286,12 @@ ut_ad(!table_node->in_progress); } - if (purge_sys.n_pages_handled() >= max_pages) + const size_t size{purge_sys.n_pages_handled()}; + if (size >= size_t{srv_purge_batch_size} || + size >= buf_pool.usable_size() * 3 / 4) break; } - purge_sys.m_active= false; - #ifdef UNIV_DEBUG thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); for (ulint i= 0; thr && i < *n_work_items; @@ -1337,6 +1340,8 @@ TRANSACTIONAL_INLINE void purge_sys_t::batch_cleanup(const purge_sys_t::iterator &head) { + m_active= false; + /* Release the undo pages. */ for (auto p : pages) p.second->unfix(); diff -Nru mariadb-10.11.11/storage/innobase/trx/trx0rec.cc mariadb-10.11.13/storage/innobase/trx/trx0rec.cc --- mariadb-10.11.11/storage/innobase/trx/trx0rec.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/trx/trx0rec.cc 2025-05-19 16:14:25.000000000 +0000 @@ -152,7 +152,9 @@ ulint n_idx = 0; for (const auto& v_index : vcol->v_indexes) { n_idx++; - /* FIXME: index->id is 64 bits! */ + if (uint32_t hi= uint32_t(v_index.index->id >> 32)) { + size += 1 + mach_get_compressed_size(hi); + } size += mach_get_compressed_size(uint32_t(v_index.index->id)); size += mach_get_compressed_size(v_index.nth_field); } @@ -179,10 +181,14 @@ ptr += mach_write_compressed(ptr, n_idx); for (const auto& v_index : vcol->v_indexes) { - ptr += mach_write_compressed( - /* FIXME: index->id is 64 bits! */ - ptr, uint32_t(v_index.index->id)); - + /* This is compatible with + ptr += mach_u64_write_much_compressed(ptr, v_index.index-id) + (the added "if" statement is fixing an old regression). */ + if (uint32_t hi= uint32_t(v_index.index->id >> 32)) { + *ptr++ = 0xff; + ptr += mach_write_compressed(ptr, hi); + } + ptr += mach_write_compressed(ptr, uint32_t(v_index.index->id)); ptr += mach_write_compressed(ptr, v_index.nth_field); } @@ -221,7 +227,15 @@ dict_index_t* clust_index = dict_table_get_first_index(table); for (ulint i = 0; i < num_idx; i++) { - index_id_t id = mach_read_next_compressed(&ptr); + index_id_t id = 0; + /* This is like mach_u64_read_much_compressed(), + but advancing ptr to the next field. */ + if (*ptr == 0xff) { + ptr++; + id = mach_read_next_compressed(&ptr); + id <<= 32; + } + id |= mach_read_next_compressed(&ptr); ulint pos = mach_read_next_compressed(&ptr); dict_index_t* index = dict_table_get_next_index(clust_index); diff -Nru mariadb-10.11.11/storage/innobase/trx/trx0trx.cc mariadb-10.11.13/storage/innobase/trx/trx0trx.cc --- mariadb-10.11.11/storage/innobase/trx/trx0trx.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/trx/trx0trx.cc 2025-05-19 16:14:25.000000000 +0000 @@ -134,8 +134,6 @@ trx->will_lock = false; - trx->bulk_insert = false; - trx->apply_online_log = false; ut_d(trx->start_file = 0); @@ -452,7 +450,7 @@ /** Transition to committed state, to release implicit locks. */ TRANSACTIONAL_INLINE inline void trx_t::commit_state() { - ut_d(auto trx_state{state}); + ut_d(auto trx_state= state); ut_ad(trx_state == TRX_STATE_PREPARED || trx_state == TRX_STATE_PREPARED_RECOVERED || trx_state == TRX_STATE_ACTIVE); @@ -1513,6 +1511,7 @@ *detailed_error= '\0'; mod_tables.clear(); + bulk_insert= TRX_NO_BULK; check_foreigns= true; check_unique_secondary= true; assert_freed(); diff -Nru mariadb-10.11.11/storage/innobase/ut/ut0rnd.cc mariadb-10.11.13/storage/innobase/ut/ut0rnd.cc --- mariadb-10.11.11/storage/innobase/ut/ut0rnd.cc 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/innobase/ut/ut0rnd.cc 2025-05-19 16:14:25.000000000 +0000 @@ -48,6 +48,8 @@ ulint pow2; ulint i; + ut_ad(n); + n += 100; pow2 = 1; diff -Nru mariadb-10.11.11/storage/maria/ma_control_file.c mariadb-10.11.13/storage/maria/ma_control_file.c --- mariadb-10.11.11/storage/maria/ma_control_file.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/maria/ma_control_file.c 2025-05-19 16:14:25.000000000 +0000 @@ -276,7 +276,7 @@ int open_flags) { uchar buffer[CF_MAX_SIZE]; - char name[FN_REFLEN], errmsg_buff[256]; + char name[FN_REFLEN], errmsg_buff[512]; const char *errmsg, *lock_failed_errmsg= "Could not get an exclusive lock;" " file is probably in use by another process"; uint new_cf_create_time_size, new_cf_changeable_size, new_block_size; @@ -399,10 +399,14 @@ if (new_cf_create_time_size < CF_MIN_CREATE_TIME_TOTAL_SIZE || new_cf_changeable_size < CF_MIN_CHANGEABLE_TOTAL_SIZE || - new_cf_create_time_size + new_cf_changeable_size != file_size) + new_cf_create_time_size + new_cf_changeable_size > file_size) { error= CONTROL_FILE_INCONSISTENT_INFORMATION; - errmsg= "Sizes stored in control file are inconsistent"; + sprintf(errmsg_buff, + "Sizes stored in control file are inconsistent. " + "create_time_size: %u changeable_size: %u file_size: %llu", + new_cf_create_time_size, new_cf_changeable_size, (ulonglong) file_size); + errmsg= errmsg_buff; goto err; } @@ -622,6 +626,20 @@ return (control_file_fd >= 0); } + + +static int check_zerofill(uchar *buffer, ulonglong offset, ulonglong length) +{ + uchar *pos= buffer + offset, *end= buffer+length; + while (pos < end) + { + if (*pos++) + return 1; + } + return 0; +} + + /** Print content of aria_log_control file */ @@ -629,6 +647,7 @@ my_bool print_aria_log_control() { uchar buffer[CF_MAX_SIZE]; + char errmsg_buff[512]; char name[FN_REFLEN], uuid_str[MY_UUID_STRING_LENGTH+1]; const char *errmsg; uint new_cf_create_time_size, new_cf_changeable_size; @@ -705,10 +724,14 @@ if (new_cf_create_time_size < CF_MIN_CREATE_TIME_TOTAL_SIZE || new_cf_changeable_size < CF_MIN_CHANGEABLE_TOTAL_SIZE || - new_cf_create_time_size + new_cf_changeable_size != file_size) + new_cf_create_time_size + new_cf_changeable_size > file_size) { error= CONTROL_FILE_INCONSISTENT_INFORMATION; - errmsg= "Sizes stored in control file are inconsistent"; + sprintf(errmsg_buff, + "Sizes stored in control file are inconsistent. " + "create_time_size: %u changeable_size: %u file_size: %llu", + new_cf_create_time_size, new_cf_changeable_size, (ulonglong) file_size); + errmsg= errmsg_buff; goto err; } checkpoint_lsn= lsn_korr(buffer + new_cf_create_time_size + @@ -732,6 +755,18 @@ (buffer + new_cf_create_time_size + CF_RECOV_FAIL_OFFSET)[0]; printf("recovery_failures: %u\n", recovery_fails); } + if (check_zerofill(buffer, new_cf_create_time_size + new_cf_changeable_size, file_size)) + { + printf("Warning: %s file_size is %llu (should be %llu) and contains unknown data.\n" + "It will still work but should be examined.\n", + name, (ulonglong) file_size, + (ulonglong) (new_cf_create_time_size + new_cf_changeable_size)); + } + else if (new_cf_create_time_size + new_cf_changeable_size < file_size) + printf("Note: file_size (%llu) is bigger than the expected file size %llu.\n" + "This is unexpected but will not cause any issues.\n", + (ulonglong) file_size, + (ulonglong) (new_cf_create_time_size + new_cf_changeable_size)); mysql_file_close(file, MYF(0)); DBUG_RETURN(0); diff -Nru mariadb-10.11.11/storage/maria/ma_pagecache.c mariadb-10.11.13/storage/maria/ma_pagecache.c --- mariadb-10.11.11/storage/maria/ma_pagecache.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/maria/ma_pagecache.c 2025-05-19 16:14:25.000000000 +0000 @@ -4726,10 +4726,10 @@ static int cmp_sec_link(const void *a_, const void *b_) { - PAGECACHE_BLOCK_LINK *const *a= a_; - PAGECACHE_BLOCK_LINK *const *b= b_; - return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 : - ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0); + const PAGECACHE_BLOCK_LINK *a= *(const PAGECACHE_BLOCK_LINK **) a_; + const PAGECACHE_BLOCK_LINK *b= *(const PAGECACHE_BLOCK_LINK **) b_; + return ((a->hash_link->pageno < b->hash_link->pageno) ? -1 : + (a->hash_link->pageno > b->hash_link->pageno) ? 1 : 0); } diff -Nru mariadb-10.11.11/storage/maria/ma_unique.c mariadb-10.11.13/storage/maria/ma_unique.c --- mariadb-10.11.11/storage/maria/ma_unique.c 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/maria/ma_unique.c 2025-05-19 16:14:25.000000000 +0000 @@ -139,6 +139,8 @@ { uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); memcpy((void*) &pos,pos+keyseg->bit_start,sizeof(char*)); + if (!pos) + pos= (const uchar*) ""; /* hash_sort does not support NULL ptr */ if (!length || length > tmp_length) length=tmp_length; /* The whole blob */ } @@ -236,6 +238,10 @@ } memcpy((void*) &pos_a, pos_a+keyseg->bit_start, sizeof(char*)); memcpy((void*) &pos_b, pos_b+keyseg->bit_start, sizeof(char*)); + if (pos_a == 0) + pos_a= (const uchar *) ""; /* Avoid UBSAN nullptr-with-offset */ + if (pos_b == 0) + pos_b= (const uchar *) ""; /* Avoid UBSAN nullptr-with-offset */ } if (type == HA_KEYTYPE_TEXT/* the CHAR data type*/) { diff -Nru mariadb-10.11.11/storage/mroonga/CMakeLists.txt mariadb-10.11.13/storage/mroonga/CMakeLists.txt --- mariadb-10.11.11/storage/mroonga/CMakeLists.txt 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/mroonga/CMakeLists.txt 2025-05-19 16:14:25.000000000 +0000 @@ -17,7 +17,7 @@ # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 2.8...3.12) project(mroonga) if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") diff -Nru mariadb-10.11.11/storage/mroonga/ha_mroonga.cpp mariadb-10.11.13/storage/mroonga/ha_mroonga.cpp --- mariadb-10.11.11/storage/mroonga/ha_mroonga.cpp 2025-01-30 11:01:24.000000000 +0000 +++ mariadb-10.11.13/storage/mroonga/ha_mroonga.cpp 2025-05-19 16:14:25.000000000 +0000 @@ -558,6 +558,9 @@ case HA_EXTRA_END_ALTER_COPY: inspected = "HA_EXTRA_END_ALTER_COPY"; break; + case HA_EXTRA_ABORT_ALTER_COPY: + inspected = "HA_EXTRA_ABORT_ALTER_COPY"; + break; #ifdef MRN_HAVE_HA_EXTRA_EXPORT case HA_EXTRA_EXPORT: inspected = "HA_EXTRA_EXPORT"; @@ -593,6 +596,11 @@ inspected = "HA_EXTRA_END_ALTER_COPY"; break; #endif +#ifdef MRN_HAVE_HA_EXTRA_ABORT_ALTER_COPY + case HA_EXTRA_ABORT_ALTER_COPY: + inspected = "HA_EXTRA_ABORT_ALTER_COPY"; + break; +#endif #ifdef MRN_HAVE_HA_EXTRA_NO_AUTOINC_LOCKING case HA_EXTRA_NO_AUTOINC_LOCKING: inspected = "HA_EXTRA_NO_AUTOINC_LOCKING"; diff -Nru mariadb-10.11.11/storage/mroonga/vendor/groonga/CMakeLists.txt mariadb-10.11.13/storage/mroonga/vendor/groonga/CMakeLists.txt --- mariadb-10.11.11/storage/mroonga/vendor/groonga/CMakeLists.txt 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/mroonga/vendor/groonga/CMakeLists.txt 2025-05-19 16:14:26.000000000 +0000 @@ -15,7 +15,7 @@ # https://buildbot.askmonty.org/buildbot/builders/work-amd64-valgrind/builds/5263/steps/compile/logs/stdio # says CMake 2.6.2... We want to drop old software support... -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 2.8...3.12) # cmake_minimum_required(VERSION 2.6.4) # CentOS 5 set(GRN_PROJECT_NAME "groonga") set(GRN_PROJECT_LABEL "Groonga") diff -Nru mariadb-10.11.11/storage/mroonga/vendor/groonga/lib/db.c mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/db.c --- mariadb-10.11.11/storage/mroonga/vendor/groonga/lib/db.c 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/db.c 2025-05-19 16:14:26.000000000 +0000 @@ -969,8 +969,8 @@ *subrec_size = range_size + sizeof(uint32_t) + sizeof(uint32_t); break; } - *value_size = (uintptr_t)GRN_RSET_SUBRECS_NTH((((grn_rset_recinfo *)0)->subrecs), - *subrec_size, max_n_subrecs); + *value_size = (uintptr_t) GRN_RSET_SUBRECS_NTH(offsetof(grn_rset_recinfo, subrecs), + *subrec_size, max_n_subrecs); } else { *value_size = range_size; } diff -Nru mariadb-10.11.11/storage/mroonga/vendor/groonga/lib/hash.c mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/hash.c --- mariadb-10.11.11/storage/mroonga/vendor/groonga/lib/hash.c 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/hash.c 2025-05-19 16:14:26.000000000 +0000 @@ -1727,15 +1727,15 @@ { if (flags & GRN_OBJ_KEY_VAR_SIZE) { if (flags & GRN_OBJ_KEY_LARGE) { - return (uintptr_t)((grn_io_hash_entry_large *)0)->value + value_size; + return offsetof(grn_io_hash_entry_large, value) + value_size; } else { - return (uintptr_t)((grn_io_hash_entry_normal *)0)->value + value_size; + return offsetof(grn_io_hash_entry_normal, value) + value_size; } } else { if (key_size == sizeof(uint32_t)) { - return (uintptr_t)((grn_plain_hash_entry *)0)->value + value_size; + return offsetof(grn_plain_hash_entry, value) + value_size; } else { - return (uintptr_t)((grn_rich_hash_entry *)0)->key_and_value + return offsetof(grn_rich_hash_entry, key_and_value) + key_size + value_size; } } @@ -1865,12 +1865,12 @@ { uint32_t entry_size; if (flags & GRN_OBJ_KEY_VAR_SIZE) { - entry_size = (uintptr_t)((grn_tiny_hash_entry *)0)->value + value_size; + entry_size = offsetof(grn_tiny_hash_entry, value) + value_size; } else { if (key_size == sizeof(uint32_t)) { - entry_size = (uintptr_t)((grn_plain_hash_entry *)0)->value + value_size; + entry_size = offsetof(grn_plain_hash_entry, value) + value_size; } else { - entry_size = (uintptr_t)((grn_rich_hash_entry *)0)->key_and_value + entry_size = offsetof(grn_rich_hash_entry, key_and_value) + key_size + value_size; } } diff -Nru mariadb-10.11.11/storage/mroonga/vendor/groonga/lib/ii.c mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/ii.c --- mariadb-10.11.11/storage/mroonga/vendor/groonga/lib/ii.c 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/mroonga/vendor/groonga/lib/ii.c 2025-05-19 16:14:26.000000000 +0000 @@ -2049,7 +2049,7 @@ if ((df & 1)) { df >>= 1; size = nreq == dvlen ? data_size : df * nreq; - if (dv[dvlen].data < dv[0].data + size) { + if (!dv[0].data || dv[dvlen].data < dv[0].data + size) { if (dv[0].data) { GRN_FREE(dv[0].data); } if (!(rp = GRN_MALLOC(size * sizeof(uint32_t)))) { return 0; } dv[dvlen].data = rp + size; @@ -10653,7 +10653,7 @@ } #define GRN_II_BUILDER_TERM_INPLACE_SIZE\ - (sizeof(grn_ii_builder_term) - (uintptr_t)&((grn_ii_builder_term *)0)->dummy) + (sizeof(grn_ii_builder_term) - offsetof(grn_ii_builder_term, dummy)) typedef struct { grn_id rid; /* Last record ID */ diff -Nru mariadb-10.11.11/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt mariadb-10.11.13/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt --- mariadb-10.11.11/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/mroonga/vendor/groonga/vendor/plugins/groonga-normalizer-mysql/CMakeLists.txt 2025-05-19 16:14:26.000000000 +0000 @@ -15,7 +15,7 @@ # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1335 USA -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 2.8...3.12) if(NOT DEFINED GROONGA_NORMALIZER_MYSQL_PROJECT_NAME) set(GROONGA_NORMALIZER_MYSQL_PROJECT_NAME "groonga-normalizer-mysql") endif() diff -Nru mariadb-10.11.11/storage/myisam/mi_unique.c mariadb-10.11.13/storage/myisam/mi_unique.c --- mariadb-10.11.11/storage/myisam/mi_unique.c 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/myisam/mi_unique.c 2025-05-19 16:14:26.000000000 +0000 @@ -115,6 +115,8 @@ { uint tmp_length=_mi_calc_blob_length(keyseg->bit_start,pos); memcpy((char**) &pos, pos+keyseg->bit_start, sizeof(char*)); + if (!pos) + pos= (const uchar*) ""; /* hash_sort does not support NULL ptr */ if (!length || length > tmp_length) length=tmp_length; /* The whole blob */ } @@ -211,6 +213,10 @@ } memcpy((char**) &pos_a, pos_a+keyseg->bit_start, sizeof(char*)); memcpy((char**) &pos_b, pos_b+keyseg->bit_start, sizeof(char*)); + if (pos_a == 0) + pos_a= (const uchar *) ""; /* Avoid UBSAN nullptr-with-offset */ + if (pos_b == 0) + pos_b= (const uchar *) ""; /* Avoid UBSAN nullptr-with-offset */ } if (type == HA_KEYTYPE_TEXT/*The CHAR data type*/) { diff -Nru mariadb-10.11.11/storage/rocksdb/build_rocksdb.cmake mariadb-10.11.13/storage/rocksdb/build_rocksdb.cmake --- mariadb-10.11.11/storage/rocksdb/build_rocksdb.cmake 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/build_rocksdb.cmake 2025-05-19 16:14:26.000000000 +0000 @@ -176,35 +176,53 @@ # - *_test.cc # - *_bench.cc set(ROCKSDB_SOURCES + cache/cache.cc + cache/cache_entry_roles.cc + cache/cache_key.cc + cache/cache_reservation_manager.cc cache/clock_cache.cc cache/lru_cache.cc cache/sharded_cache.cc db/arena_wrapped_db_iter.cc + db/blob/blob_fetcher.cc + db/blob/blob_file_addition.cc + db/blob/blob_file_builder.cc + db/blob/blob_file_builder.cc + db/blob/blob_file_cache.cc + db/blob/blob_file_garbage.cc + db/blob/blob_file_meta.cc + db/blob/blob_file_reader.cc + db/blob/blob_garbage_meter.cc + db/blob/blob_log_format.cc + db/blob/blob_log_sequential_reader.cc + db/blob/blob_log_writer.cc + db/blob/prefetch_buffer_collection.cc db/builder.cc db/c.cc db/column_family.cc - db/compacted_db_impl.cc db/compaction/compaction.cc db/compaction/compaction_iterator.cc - db/compaction/compaction_picker.cc db/compaction/compaction_job.cc + db/compaction/compaction_picker.cc db/compaction/compaction_picker_fifo.cc db/compaction/compaction_picker_level.cc db/compaction/compaction_picker_universal.cc + db/compaction/sst_partitioner.cc db/convenience.cc db/db_filesnapshot.cc + db/dbformat.cc + db/db_impl/compacted_db_impl.cc db/db_impl/db_impl.cc - db/db_impl/db_impl_write.cc db/db_impl/db_impl_compaction_flush.cc - db/db_impl/db_impl_files.cc - db/db_impl/db_impl_open.cc db/db_impl/db_impl_debug.cc db/db_impl/db_impl_experimental.cc + db/db_impl/db_impl_files.cc + db/db_impl/db_impl_open.cc db/db_impl/db_impl_readonly.cc db/db_impl/db_impl_secondary.cc + db/db_impl/db_impl_write.cc db/db_info_dumper.cc db/db_iter.cc - db/dbformat.cc db/error_handler.cc db/event_helpers.cc db/experimental.cc @@ -215,14 +233,16 @@ db/forward_iterator.cc db/import_column_family_job.cc db/internal_stats.cc - db/logs_with_prep_tracker.cc db/log_reader.cc + db/logs_with_prep_tracker.cc db/log_writer.cc db/malloc_stats.cc db/memtable.cc db/memtable_list.cc db/merge_helper.cc db/merge_operator.cc + db/output_validator.cc + db/periodic_work_scheduler.cc db/range_del_aggregator.cc db/range_tombstone_fragmenter.cc db/repair.cc @@ -233,25 +253,32 @@ db/trim_history_scheduler.cc db/version_builder.cc db/version_edit.cc + db/version_edit_handler.cc db/version_set.cc + db/wal_edit.cc db/wal_manager.cc - db/write_batch.cc db/write_batch_base.cc + db/write_batch.cc db/write_controller.cc db/write_thread.cc + env/composite_env.cc env/env.cc env/env_chroot.cc env/env_encryption.cc env/env_hdfs.cc env/file_system.cc + env/file_system_tracer.cc + env/fs_remap.cc env/mock_env.cc + env/unique_id_gen.cc file/delete_scheduler.cc + file/filename.cc file/file_prefetch_buffer.cc file/file_util.cc - file/filename.cc + file/line_file_reader.cc file/random_access_file_reader.cc - file/read_write_util.cc file/readahead_raf.cc + file/read_write_util.cc file/sequence_file_reader.cc file/sst_file_manager_impl.cc file/writable_file_writer.cc @@ -281,29 +308,38 @@ monitoring/thread_status_util.cc monitoring/thread_status_util_debug.cc options/cf_options.cc + options/configurable.cc + options/customizable.cc options/db_options.cc options/options.cc options/options_helper.cc options/options_parser.cc - options/options_sanity_check.cc port/stack_trace.cc table/adaptive/adaptive_table_factory.cc - table/block_based/block.cc + table/block_based/binary_search_index_reader.cc table/block_based/block_based_filter_block.cc table/block_based/block_based_table_builder.cc table/block_based/block_based_table_factory.cc + table/block_based/block_based_table_iterator.cc table/block_based/block_based_table_reader.cc table/block_based/block_builder.cc + table/block_based/block.cc + table/block_based/block_prefetcher.cc table/block_based/block_prefix_index.cc - table/block_based/data_block_hash_index.cc table/block_based/data_block_footer.cc + table/block_based/data_block_hash_index.cc table/block_based/filter_block_reader_common.cc table/block_based/filter_policy.cc table/block_based/flush_block_policy.cc table/block_based/full_filter_block.cc + table/block_based/hash_index_reader.cc table/block_based/index_builder.cc + table/block_based/index_reader_common.cc table/block_based/parsed_full_filter_block.cc table/block_based/partitioned_filter_block.cc + table/block_based/partitioned_index_iterator.cc + table/block_based/partitioned_index_reader.cc + table/block_based/reader_common.cc table/block_based/uncompression_dict_reader.cc table/block_fetcher.cc table/cuckoo/cuckoo_table_builder.cc @@ -321,10 +357,13 @@ table/plain/plain_table_index.cc table/plain/plain_table_key_coding.cc table/plain/plain_table_reader.cc + table/sst_file_dumper.cc table/sst_file_reader.cc table/sst_file_writer.cc + table/table_factory.cc table/table_properties.cc table/two_level_iterator.cc + table/unique_id.cc test_util/sync_point.cc test_util/sync_point_impl.cc test_util/testutil.cc @@ -335,8 +374,12 @@ tools/ldb_tool.cc tools/sst_dump_tool.cc tools/trace_analyzer_tool.cc - trace_replay/trace_replay.cc trace_replay/block_cache_tracer.cc + trace_replay/io_tracer.cc + trace_replay/trace_record.cc + trace_replay/trace_record_handler.cc + trace_replay/trace_record_result.cc + trace_replay/trace_replay.cc util/coding.cc util/compaction_job_stats_impl.cc util/comparator.cc @@ -344,17 +387,8 @@ util/concurrent_task_limiter_impl.cc util/crc32c.cc util/dynamic_bloom.cc - util/hash.cc - util/murmurhash.cc - util/random.cc - util/rate_limiter.cc - util/slice.cc util/file_checksum_helper.cc - util/status.cc - util/string_util.cc - util/thread_local.cc - util/threadpool_imp.cc - util/xxhash.cc + util/hash.cc utilities/backupable/backupable_db.cc utilities/blob_db/blob_compaction_filter.cc utilities/blob_db/blob_db.cc @@ -362,10 +396,8 @@ utilities/blob_db/blob_db_impl_filesnapshot.cc utilities/blob_db/blob_dump_tool.cc utilities/blob_db/blob_file.cc - utilities/blob_db/blob_log_reader.cc - utilities/blob_db/blob_log_writer.cc - utilities/blob_db/blob_log_format.cc utilities/checkpoint/checkpoint_impl.cc + utilities/compaction_filters.cc utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc utilities/debug.cc utilities/env_mirror.cc @@ -373,11 +405,12 @@ utilities/leveldb_options/leveldb_options.cc utilities/memory/memory_util.cc utilities/merge_operators/bytesxor.cc + utilities/merge_operators.cc utilities/merge_operators/max.cc utilities/merge_operators/put.cc utilities/merge_operators/sortlist.cc - utilities/merge_operators/string_append/stringappend.cc utilities/merge_operators/string_append/stringappend2.cc + utilities/merge_operators/string_append/stringappend.cc utilities/merge_operators/uint64add.cc utilities/object_registry.cc utilities/option_change_migration/option_change_migration.cc @@ -391,22 +424,37 @@ utilities/simulator_cache/sim_cache.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc utilities/trace/file_trace_reader_writer.cc - utilities/transactions/optimistic_transaction_db_impl.cc + utilities/trace/replayer_impl.cc + utilities/transactions/lock/lock_manager.cc + utilities/transactions/lock/point/point_lock_manager.cc + utilities/transactions/lock/point/point_lock_tracker.cc utilities/transactions/optimistic_transaction.cc + utilities/transactions/optimistic_transaction_db_impl.cc utilities/transactions/pessimistic_transaction.cc utilities/transactions/pessimistic_transaction_db.cc utilities/transactions/snapshot_checker.cc utilities/transactions/transaction_base.cc utilities/transactions/transaction_db_mutex_impl.cc - utilities/transactions/transaction_lock_mgr.cc utilities/transactions/transaction_util.cc utilities/transactions/write_prepared_txn.cc utilities/transactions/write_prepared_txn_db.cc utilities/transactions/write_unprepared_txn.cc utilities/transactions/write_unprepared_txn_db.cc utilities/ttl/db_ttl_impl.cc + utilities/wal_filter.cc utilities/write_batch_with_index/write_batch_with_index.cc utilities/write_batch_with_index/write_batch_with_index_internal.cc + util/murmurhash.cc + util/random.cc + util/rate_limiter.cc + util/regex.cc + util/ribbon_config.cc + util/slice.cc + util/status.cc + util/string_util.cc + util/thread_local.cc + util/threadpool_imp.cc + util/xxhash.cc ) @@ -484,8 +532,10 @@ STRING(TIMESTAMP GIT_DATE_TIME "%Y-%m-%d %H:%M:%S") ENDIF() +# psergey-added: +SET(GIT_MOD 0) CONFIGURE_FILE(${ROCKSDB_SOURCE_DIR}/util/build_version.cc.in build_version.cc @ONLY) -INCLUDE_DIRECTORIES(${ROCKSDB_SOURCE_DIR}/util) + list(APPEND SOURCES ${CMAKE_CURRENT_BINARY_DIR}/build_version.cc) ADD_CONVENIENCE_LIBRARY(rocksdblib ${SOURCES}) diff -Nru mariadb-10.11.11/storage/rocksdb/ha_rocksdb.cc mariadb-10.11.13/storage/rocksdb/ha_rocksdb.cc --- mariadb-10.11.11/storage/rocksdb/ha_rocksdb.cc 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/ha_rocksdb.cc 2025-05-19 16:14:26.000000000 +0000 @@ -1250,7 +1250,7 @@ "Statistics Level for RocksDB. Default is 0 (kExceptHistogramOrTimers)", nullptr, rocksdb_set_rocksdb_stats_level, /* default */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers, - /* min */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers, + /* min */ (uint)rocksdb::StatsLevel::kDisableAll, /* max */ (uint)rocksdb::StatsLevel::kAll, 0); static MYSQL_SYSVAR_SIZE_T(compaction_readahead_size, @@ -1596,7 +1596,7 @@ "BlockBasedTableOptions::no_block_cache for RocksDB", nullptr, nullptr, rocksdb_tbl_options->no_block_cache); -static MYSQL_SYSVAR_SIZE_T(block_size, rocksdb_tbl_options->block_size, +static MYSQL_SYSVAR_UINT64_T(block_size, rocksdb_tbl_options->block_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "BlockBasedTableOptions::block_size for RocksDB", nullptr, nullptr, rocksdb_tbl_options->block_size, @@ -3992,7 +3992,7 @@ DBUG_ASSERT(xid != nullptr); DBUG_ASSERT(commit_latency_stats != nullptr); - rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true); + rocksdb::StopWatchNano timer(rocksdb::SystemClock::Default().get(), true); const auto name = rdb_xid_to_string(*xid); DBUG_ASSERT(!name.empty()); @@ -4187,7 +4187,7 @@ DBUG_ASSERT(thd != nullptr); DBUG_ASSERT(commit_latency_stats != nullptr); - rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true); + rocksdb::StopWatchNano timer(rocksdb::SystemClock::Default().get(), true); /* note: h->external_lock(F_UNLCK) is called after this function is called) */ Rdb_transaction *tx = get_tx_from_thd(thd); @@ -4732,8 +4732,7 @@ if (tf_name.find("BlockBasedTable") != std::string::npos) { const rocksdb::BlockBasedTableOptions *const bbt_opt = - reinterpret_cast( - table_factory->GetOptions()); + table_factory->GetOptions(); if (bbt_opt != nullptr) { if (bbt_opt->block_cache.get() != nullptr) { diff -Nru mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result --- mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result 2025-05-19 16:14:26.000000000 +0000 @@ -20,7 +20,7 @@ set rocksdb_verify_row_debug_checksums=1; set session debug_dbug= "+d,myrocks_simulate_bad_row_read1"; select * from t1 where pk=1; -ERROR HY000: Got error 205 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 206 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_row_read1"; set rocksdb_verify_row_debug_checksums=@tmp1; select * from t1 where pk=1; @@ -28,11 +28,11 @@ 1 1 set session debug_dbug= "+d,myrocks_simulate_bad_row_read2"; select * from t1 where pk=1; -ERROR HY000: Got error 205 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 206 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_row_read2"; set session debug_dbug= "+d,myrocks_simulate_bad_row_read3"; select * from t1 where pk=1; -ERROR HY000: Got error 205 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 206 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_row_read3"; insert into t1 values(4,'0123456789'); select * from t1; @@ -56,7 +56,7 @@ ABCD 1 set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1"; select * from t2; -ERROR HY000: Got error 205 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 206 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1"; drop table t2; create table t2 ( @@ -69,6 +69,6 @@ ABCD 1 set session debug_dbug= "+d,myrocks_simulate_bad_pk_read1"; select * from t2; -ERROR HY000: Got error 205 'Found data corruption.' from ROCKSDB +ERROR HY000: Got error 206 'Found data corruption.' from ROCKSDB set session debug_dbug= "-d,myrocks_simulate_bad_pk_read1"; drop table t2; diff -Nru mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result --- mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result 2025-05-19 16:14:26.000000000 +0000 @@ -1,12 +1,6 @@ -call mtr.add_suppression("Column family 'cf1' not found"); -call mtr.add_suppression("Column family 'rev:cf2' not found"); DROP TABLE IF EXISTS t1; call mtr.add_suppression("Column family 'cf1' not found"); call mtr.add_suppression("Column family 'rev:cf2' not found"); -set global rocksdb_compact_cf = 'cf1'; -set global rocksdb_compact_cf = 'rev:cf2'; -set global rocksdb_signal_drop_index_thread = 1; -# restart CREATE TABLE t1 ( a int not null, b int not null, @@ -15,6 +9,10 @@ key (b) comment 'rev:cf2' ) ENGINE=RocksDB; DELETE FROM t1; +set global rocksdb_compact_cf = 'cf1'; +set global rocksdb_compact_cf = 'rev:cf2'; +set global rocksdb_signal_drop_index_thread = 1; +# restart select variable_value into @a from information_schema.global_status where variable_name='rocksdb_compact_read_bytes'; drop table t1; select case when variable_value-@a < 500000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_compact_read_bytes'; diff -Nru mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result --- mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result 2025-05-19 16:14:26.000000000 +0000 @@ -982,7 +982,7 @@ rocksdb_skip_unique_check_tables .* rocksdb_sst_mgr_rate_bytes_per_sec 0 rocksdb_stats_dump_period_sec 600 -rocksdb_stats_level 0 +rocksdb_stats_level 1 rocksdb_stats_recalc_rate 0 rocksdb_store_row_debug_checksums OFF rocksdb_strict_collation_check OFF diff -Nru mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result --- mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result 2025-05-19 16:14:26.000000000 +0000 @@ -1,2 +1,2 @@ Check for MANIFEST files -MANIFEST-000006 +MANIFEST-000004 diff -Nru mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result --- mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result 2025-05-19 16:14:26.000000000 +0000 @@ -1,12 +1,6 @@ -call mtr.add_suppression("Column family 'cf1' not found"); -call mtr.add_suppression("Column family 'rev:cf2' not found"); DROP TABLE IF EXISTS t1; call mtr.add_suppression("Column family 'cf1' not found"); call mtr.add_suppression("Column family 'rev:cf2' not found"); -set global rocksdb_compact_cf = 'cf1'; -set global rocksdb_compact_cf = 'rev:cf2'; -set global rocksdb_signal_drop_index_thread = 1; -# restart CREATE TABLE t1 ( a int not null, b int not null, @@ -15,6 +9,10 @@ key (b) comment 'rev:cf2' ) ENGINE=RocksDB; DELETE FROM t1; +set global rocksdb_compact_cf = 'cf1'; +set global rocksdb_compact_cf = 'rev:cf2'; +set global rocksdb_signal_drop_index_thread = 1; +# restart select variable_value into @a from information_schema.global_status where variable_name='rocksdb_compact_read_bytes'; truncate table t1; select case when variable_value-@a < 500000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_compact_read_bytes'; diff -Nru mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc --- mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc 2025-05-19 16:14:26.000000000 +0000 @@ -1,8 +1,5 @@ --source include/have_rocksdb.inc -call mtr.add_suppression("Column family 'cf1' not found"); -call mtr.add_suppression("Column family 'rev:cf2' not found"); - --disable_warnings DROP TABLE IF EXISTS t1; --enable_warnings @@ -10,11 +7,6 @@ call mtr.add_suppression("Column family 'cf1' not found"); call mtr.add_suppression("Column family 'rev:cf2' not found"); -# Start from clean slate -set global rocksdb_compact_cf = 'cf1'; -set global rocksdb_compact_cf = 'rev:cf2'; -set global rocksdb_signal_drop_index_thread = 1; ---source include/restart_mysqld.inc CREATE TABLE t1 ( a int not null, @@ -29,6 +21,12 @@ let $table = t1; --source drop_table3_repopulate_table.inc +# Start from clean slate +set global rocksdb_compact_cf = 'cf1'; +set global rocksdb_compact_cf = 'rev:cf2'; +set global rocksdb_signal_drop_index_thread = 1; +--source include/restart_mysqld.inc + --disable_cursor_protocol select variable_value into @a from information_schema.global_status where variable_name='rocksdb_compact_read_bytes'; --enable_cursor_protocol @@ -49,6 +47,7 @@ --source include/wait_condition.inc select case when variable_value-@a < 500000 then 'true' else 'false' end from information_schema.global_status where variable_name='rocksdb_compact_read_bytes'; +#select variable_value-@a from information_schema.global_status where variable_name='rocksdb_compact_read_bytes'; # Cleanup DROP TABLE IF EXISTS t1; diff -Nru mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result --- mariadb-10.11.11/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result 2025-05-19 16:14:26.000000000 +0000 @@ -11,7 +11,7 @@ SET @start_global_value = @@global.ROCKSDB_STATS_LEVEL; SELECT @start_global_value; @start_global_value -0 +1 '# Setting to valid values in global scope#' "Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 0" SET @@global.ROCKSDB_STATS_LEVEL = 0; @@ -22,7 +22,7 @@ SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT; SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 "Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 4" SET @@global.ROCKSDB_STATS_LEVEL = 4; SELECT @@global.ROCKSDB_STATS_LEVEL; @@ -32,7 +32,7 @@ SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT; SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 "Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 2" SET @@global.ROCKSDB_STATS_LEVEL = 2; SELECT @@global.ROCKSDB_STATS_LEVEL; @@ -42,7 +42,7 @@ SET @@global.ROCKSDB_STATS_LEVEL = DEFAULT; SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 "Trying to set variable @@session.ROCKSDB_STATS_LEVEL to 444. It should fail because it is not session." SET @@session.ROCKSDB_STATS_LEVEL = 444; ERROR HY000: Variable 'rocksdb_stats_level' is a GLOBAL variable and should be set with SET GLOBAL @@ -52,34 +52,34 @@ Got one of the listed errors SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 "Trying to set variable @@global.ROCKSDB_STATS_LEVEL to 'bbb'" SET @@global.ROCKSDB_STATS_LEVEL = 'bbb'; Got one of the listed errors SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 "Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '-1'" SET @@global.ROCKSDB_STATS_LEVEL = '-1'; Got one of the listed errors SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 "Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '101'" SET @@global.ROCKSDB_STATS_LEVEL = '101'; Got one of the listed errors SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 "Trying to set variable @@global.ROCKSDB_STATS_LEVEL to '484436'" SET @@global.ROCKSDB_STATS_LEVEL = '484436'; Got one of the listed errors SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 SET @@global.ROCKSDB_STATS_LEVEL = @start_global_value; SELECT @@global.ROCKSDB_STATS_LEVEL; @@global.ROCKSDB_STATS_LEVEL -0 +1 DROP TABLE valid_values; DROP TABLE invalid_values; diff -Nru mariadb-10.11.11/storage/rocksdb/rdb_i_s.cc mariadb-10.11.13/storage/rocksdb/rdb_i_s.cc --- mariadb-10.11.11/storage/rocksdb/rdb_i_s.cc 2025-01-30 11:01:25.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rdb_i_s.cc 2025-05-19 16:14:26.000000000 +0000 @@ -587,8 +587,7 @@ cf_option_types.push_back( {"PREFIX_EXTRACTOR", opts.prefix_extractor == nullptr ? "NULL" - : std::string(opts.prefix_extractor->Name())}); - + : std::string(opts.prefix_extractor->AsString())}); // get COMPACTION_STYLE option switch (opts.compaction_style) { case rocksdb::kCompactionStyleLevel: @@ -646,7 +645,7 @@ // get table related options std::vector table_options = - split_into_vector(opts.table_factory->GetPrintableTableOptions(), '\n'); + split_into_vector(opts.table_factory->GetPrintableOptions(), '\n'); for (auto option : table_options) { option.erase(std::remove(option.begin(), option.end(), ' '), diff -Nru mariadb-10.11.11/storage/rocksdb/rdb_source_revision.h mariadb-10.11.13/storage/rocksdb/rdb_source_revision.h --- mariadb-10.11.11/storage/rocksdb/rdb_source_revision.h 2025-01-30 11:01:27.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rdb_source_revision.h 2025-05-19 16:14:28.000000000 +0000 @@ -1 +1 @@ -#define ROCKSDB_GIT_HASH "bba5e7bc21093d7cfa765e1280a7c4fdcd284288" +#define ROCKSDB_GIT_HASH "79f08d7ffa6d34d9ca3357777bcb335884a56cfb" diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/cat_ignore_eagain mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/cat_ignore_eagain --- mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/cat_ignore_eagain 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/cat_ignore_eagain 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,54 @@ +#! /bin/bash + +# Work around issue with parallel make output causing random error, as in +# make[1]: write error: stdout +# Probably due to a kernel bug: +# https://bugs.launchpad.net/ubuntu/+source/linux-signed/+bug/1814393 +# Seems to affect image ubuntu-1604:201903-01 and ubuntu-1604:202004-01 + +cd "$(dirname $0)" + +if [ ! -x cat_ignore_eagain.out ]; then + cc -x c -o cat_ignore_eagain.out - << EOF +#include +#include +#include +int main() { + int n, m, p; + char buf[1024]; + for (;;) { + n = read(STDIN_FILENO, buf, 1024); + if (n > 0 && n <= 1024) { + for (m = 0; m < n;) { + p = write(STDOUT_FILENO, buf + m, n - m); + if (p < 0) { + if (errno == EAGAIN) { + // ignore but pause a bit + usleep(100); + } else { + perror("write failed"); + return 42; + } + } else { + m += p; + } + } + } else if (n < 0) { + if (errno == EAGAIN) { + // ignore but pause a bit + usleep(100); + } else { + // Some non-ignorable error + perror("read failed"); + return 43; + } + } else { + // EOF + return 0; + } + } +} +EOF +fi + +exec ./cat_ignore_eagain.out diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/config.yml mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/config.yml --- mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/config.yml 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/config.yml 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,872 @@ +version: 2.1 + +orbs: + win: circleci/windows@2.4.0 + slack: circleci/slack@3.4.2 + +aliases: + - ¬ify-on-main-failure + fail_only: true + only_for_branches: main + +commands: + install-cmake-on-macos: + steps: + - run: + name: Install cmake on macos + command: | + HOMEBREW_NO_AUTO_UPDATE=1 brew install cmake + + install-jdk8-on-macos: + steps: + - run: + name: Install JDK 8 on macos + command: | + brew install --cask adoptopenjdk/openjdk/adoptopenjdk8 + + increase-max-open-files-on-macos: + steps: + - run: + name: Increase max open files + command: | + sudo sysctl -w kern.maxfiles=1048576 + sudo sysctl -w kern.maxfilesperproc=1048576 + sudo launchctl limit maxfiles 1048576 + + pre-steps: + steps: + - checkout + - run: + name: Setup Environment Variables + command: | + echo "export GTEST_THROW_ON_FAILURE=0" >> $BASH_ENV + echo "export GTEST_OUTPUT=\"xml:/tmp/test-results/\"" >> $BASH_ENV + echo "export SKIP_FORMAT_BUCK_CHECKS=1" >> $BASH_ENV + echo "export GTEST_COLOR=1" >> $BASH_ENV + echo "export CTEST_OUTPUT_ON_FAILURE=1" >> $BASH_ENV + echo "export CTEST_TEST_TIMEOUT=300" >> $BASH_ENV + echo "export ZLIB_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zlib" >> $BASH_ENV + echo "export BZIP2_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/bzip2" >> $BASH_ENV + echo "export SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> $BASH_ENV + echo "export LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> $BASH_ENV + echo "export ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> $BASH_ENV + + pre-steps-macos: + steps: + - pre-steps + + post-steps: + steps: + - slack/status: *notify-on-main-failure + - store_test_results: # store test result if there's any + path: /tmp/test-results + - store_artifacts: # store LOG for debugging if there's any + path: LOG + - run: # on fail, compress Test Logs for diagnosing the issue + name: Compress Test Logs + command: tar -cvzf t.tar.gz t + when: on_fail + - store_artifacts: # on fail, store Test Logs for diagnosing the issue + path: t.tar.gz + destination: test_logs + when: on_fail + + install-clang-10: + steps: + - run: + name: Install Clang 10 + command: | + echo "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" | sudo tee -a /etc/apt/sources.list + echo "deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" | sudo tee -a /etc/apt/sources.list + echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable + sudo apt-get update -y && sudo apt-get install -y clang-10 + + install-clang-13: + steps: + - run: + name: Install Clang 13 + command: | + echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee -a /etc/apt/sources.list + echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee -a /etc/apt/sources.list + echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - + sudo apt-get update -y && sudo apt-get install -y clang-13 + + install-gflags: + steps: + - run: + name: Install gflags + command: | + sudo apt-get update -y && sudo apt-get install -y libgflags-dev + + install-benchmark: + steps: + - run: # currently doesn't support ubuntu-1604 which doesn't have libbenchmark package, user can still install by building it youself + name: Install benchmark + command: | + sudo apt-get update -y && sudo apt-get install -y libbenchmark-dev + + install-librados: + steps: + - run: + name: Install librados + command: | + sudo apt-get update -y && sudo apt-get install -y librados-dev + + upgrade-cmake: + steps: + - run: + name: Upgrade cmake + command: | + sudo apt remove --purge cmake + sudo snap install cmake --classic + + install-gflags-on-macos: + steps: + - run: + name: Install gflags on macos + command: | + HOMEBREW_NO_AUTO_UPDATE=1 brew install gflags + + install-gtest-parallel: + steps: + - run: + name: Install gtest-parallel + command: | + git clone --single-branch --branch master --depth 1 https://github.com/google/gtest-parallel.git ~/gtest-parallel + echo 'export PATH=$HOME/gtest-parallel:$PATH' >> $BASH_ENV + + install-compression-libs: + steps: + - run: + name: Install compression libs + command: | + sudo apt-get update -y && sudo apt-get install -y libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev + +executors: + windows-2xlarge: + machine: + image: 'windows-server-2019-vs2019:stable' + resource_class: windows.2xlarge + shell: bash.exe + +jobs: + build-macos: + macos: + xcode: 12.5.1 + resource_class: large + environment: + ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now + steps: + - increase-max-open-files-on-macos + - install-gflags-on-macos + - pre-steps-macos + - run: ulimit -S -n 1048576 && OPT=-DCIRCLECI make V=1 J=32 -j32 check 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-macos-cmake: + macos: + xcode: 12.5.1 + resource_class: large + steps: + - increase-max-open-files-on-macos + - install-cmake-on-macos + - install-gflags-on-macos + - pre-steps-macos + - run: ulimit -S -n 1048576 && (mkdir build && cd build && cmake -DWITH_GFLAGS=1 .. && make V=1 -j32 && ctest -j10) 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - run: make V=1 J=32 -j32 check 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-mem-env-librados: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-librados + - run: MEM_ENV=1 ROCKSDB_USE_LIBRADOS=1 make V=1 J=32 -j32 check 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-encrypted-env: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - run: ENCRYPTED_ENV=1 make V=1 J=32 -j32 check 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-shared_lib-alt_namespace-status_checked: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j32 check 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-release: + machine: + image: ubuntu-1604:202104-01 + resource_class: large + steps: + - checkout # check out the code in the project directory + - run: make V=1 -j8 release 2>&1 | .circleci/cat_ignore_eagain + - run: if ./db_stress --version; then false; else true; fi # ensure without gflags + - install-gflags + - run: make V=1 -j8 release 2>&1 | .circleci/cat_ignore_eagain + - run: ./db_stress --version # ensure with gflags + - post-steps + + build-linux-release-rtti: + machine: + image: ubuntu-1604:201903-01 + resource_class: large + steps: + - checkout # check out the code in the project directory + - run: make clean + - run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j8 static_lib tools db_bench 2>&1 | .circleci/cat_ignore_eagain + - run: if ./db_stress --version; then false; else true; fi # ensure without gflags + - run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev + - run: make clean + - run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j8 static_lib tools db_bench 2>&1 | .circleci/cat_ignore_eagain + - run: ./db_stress --version # ensure with gflags + + build-linux-lite: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - run: LITE=1 make V=1 J=32 -j32 check 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-lite-release: + machine: + image: ubuntu-1604:202104-01 + resource_class: large + steps: + - checkout # check out the code in the project directory + - run: LITE=1 make V=1 -j8 release 2>&1 | .circleci/cat_ignore_eagain + - run: if ./db_stress --version; then false; else true; fi # ensure without gflags + - install-gflags + - run: LITE=1 make V=1 -j8 release 2>&1 | .circleci/cat_ignore_eagain + - run: ./db_stress --version # ensure with gflags + - post-steps + + build-linux-clang-no_test_run: + machine: + image: ubuntu-1604:202104-01 + resource_class: xlarge + steps: + - checkout # check out the code in the project directory + - run: sudo apt-get update -y && sudo apt-get install -y clang libgflags-dev libtbb-dev + - run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-clang10-asan: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-clang-10 + - run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check 2>&1 | .circleci/cat_ignore_eagain # aligned new doesn't work for reason we haven't figured out + - post-steps + + build-linux-clang10-mini-tsan: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-clang-10 + - run: COMPILE_WITH_TSAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check 2>&1 | .circleci/cat_ignore_eagain # aligned new doesn't work for reason we haven't figured out. + - post-steps + + build-linux-clang10-ubsan: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-clang-10 + - run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check 2>&1 | .circleci/cat_ignore_eagain # aligned new doesn't work for reason we haven't figured out + - post-steps + + build-linux-clang10-clang-analyze: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-clang-10 + - run: sudo apt-get update -y && sudo apt-get install -y clang-tools-10 + - run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze 2>&1 | .circleci/cat_ignore_eagain # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path. + - post-steps + + build-linux-cmake: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - upgrade-cmake + - run: (mkdir build && cd build && cmake -DWITH_GFLAGS=1 .. && make V=1 -j20 && ctest -j20) 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-cmake-ubuntu-20: + machine: + image: ubuntu-2004:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-benchmark + - run: (mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20 && make microbench) 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-unity-and-headers: + docker: # executor type + - image: gcc:latest + resource_class: large + steps: + - checkout # check out the code in the project directory + - run: apt-get update -y && apt-get install -y libgflags-dev + - run: TEST_TMPDIR=/dev/shm && make V=1 -j8 unity_test 2>&1 | .circleci/cat_ignore_eagain + - run: make V=1 -j8 -k check-headers 2>&1 | .circleci/cat_ignore_eagain # could be moved to a different build + - post-steps + + build-linux-gcc-4_8-no_test_run: + machine: + image: ubuntu-1604:202104-01 + resource_class: large + steps: + - pre-steps + - run: sudo apt-get update -y && sudo apt-get install gcc-4.8 g++-4.8 libgflags-dev + - run: CC=gcc-4.8 CXX=g++-4.8 V=1 SKIP_LINK=1 make -j8 all 2>&1 | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI + - post-steps + + build-linux-gcc-8-no_test_run: + machine: + image: ubuntu-2004:202010-01 + resource_class: large + steps: + - pre-steps + - run: sudo apt-get update -y && sudo apt-get install gcc-8 g++-8 libgflags-dev + - run: CC=gcc-8 CXX=g++-8 V=1 SKIP_LINK=1 make -j8 all 2>&1 | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI + - post-steps + + build-linux-gcc-9-no_test_run: + machine: + image: ubuntu-2004:202010-01 + resource_class: large + steps: + - pre-steps + - run: sudo apt-get update -y && sudo apt-get install gcc-9 g++-9 libgflags-dev + - run: CC=gcc-9 CXX=g++-9 V=1 SKIP_LINK=1 make -j8 all 2>&1 | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI + - post-steps + + build-linux-gcc-10-cxx20-no_test_run: + machine: + image: ubuntu-2004:202010-01 + resource_class: xlarge + steps: + - pre-steps + - run: sudo apt-get update -y && sudo apt-get install gcc-10 g++-10 libgflags-dev + - run: CC=gcc-10 CXX=g++-10 V=1 SKIP_LINK=1 ROCKSDB_CXX_STANDARD=c++20 make -j16 all 2>&1 | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI + - post-steps + + build-linux-gcc-11-no_test_run: + machine: + image: ubuntu-2004:202010-01 + resource_class: xlarge + steps: + - pre-steps + - run: sudo apt-get update -y && sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get install gcc-11 g++-11 libgflags-dev + - run: CC=gcc-11 CXX=g++-11 V=1 SKIP_LINK=1 make -j16 all 2>&1 | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI + - post-steps + + build-linux-clang-13-no_test_run: + machine: + image: ubuntu-2004:202010-01 + resource_class: xlarge + steps: + - pre-steps + - install-clang-13 + - run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j16 all 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + # This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing. + build-linux-microbench: + machine: + image: ubuntu-2004:202010-01 + resource_class: xlarge + steps: + - pre-steps + - install-benchmark + - run: DEBUG_LEVEL=0 make microbench 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-windows: + executor: windows-2xlarge + parameters: + extra_cmake_opt: + default: "" + type: string + vs_year: + default: "2019" + type: string + cmake_generator: + default: "Visual Studio 16 2019" + type: string + environment: + THIRDPARTY_HOME: C:/Users/circleci/thirdparty + CMAKE_HOME: C:/Users/circleci/thirdparty/cmake-3.16.4-win64-x64 + CMAKE_BIN: C:/Users/circleci/thirdparty/cmake-3.16.4-win64-x64/bin/cmake.exe + SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.7 + SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.7;C:/Users/circleci/thirdparty/snappy-1.1.7/build + SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.7/build/Debug/snappy.lib + VS_YEAR: <> + CMAKE_GENERATOR: <> + steps: + - checkout + - run: + name: "Setup VS" + command: | + if [[ "${VS_YEAR}" == "2019" ]]; then + echo "VS2019 already present." + elif [[ "${VS_YEAR}" == "2017" ]]; then + echo "Installing VS2017..." + powershell .circleci/vs2017_install.ps1 + elif [[ "${VS_YEAR}" == "2015" ]]; then + echo "Installing VS2015..." + powershell .circleci/vs2015_install.ps1 + fi + - store_artifacts: + path: \Users\circleci\AppData\Local\Temp\vslogs.zip + - run: + name: "Install thirdparty dependencies" + command: | + mkdir ${THIRDPARTY_HOME} + cd ${THIRDPARTY_HOME} + echo "Installing CMake..." + curl --fail --silent --show-error --output cmake-3.16.4-win64-x64.zip --location https://github.com/Kitware/CMake/releases/download/v3.16.4/cmake-3.16.4-win64-x64.zip + unzip -q cmake-3.16.4-win64-x64.zip + echo "Building Snappy dependency..." + curl --fail --silent --show-error --output snappy-1.1.7.zip --location https://github.com/google/snappy/archive/1.1.7.zip + unzip -q snappy-1.1.7.zip + cd snappy-1.1.7 + mkdir build + cd build + ${CMAKE_BIN} -G "${CMAKE_GENERATOR}" .. + msbuild.exe Snappy.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64 + - run: + name: "Build RocksDB" + command: | + mkdir build + cd build + ${CMAKE_BIN} -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 << parameters.extra_cmake_opt >> .. + cd .. + echo "Building with VS version: ${CMAKE_GENERATOR}" + msbuild.exe build/rocksdb.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64 + - run: + name: "Test RocksDB" + shell: powershell.exe + command: | + build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16 + + build-linux-java: + machine: + image: ubuntu-1604:202104-01 + resource_class: large + environment: + JAVA_HOME: /usr/lib/jvm/java-1.8.0-openjdk-amd64 + steps: + - pre-steps + - install-gflags + - run: + name: "Set Java Environment" + command: | + echo "JAVA_HOME=${JAVA_HOME}" + echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV + which java && java -version + which javac && javac -version + - run: + name: "Build RocksDBJava Shared Library" + command: make V=1 J=8 -j8 rocksdbjava 2>&1 | .circleci/cat_ignore_eagain + - run: + name: "Test RocksDBJava" + command: make V=1 J=8 -j8 jtest 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-java-static: + machine: + image: ubuntu-1604:202104-01 + resource_class: large + environment: + JAVA_HOME: /usr/lib/jvm/java-1.8.0-openjdk-amd64 + steps: + - pre-steps + - install-gflags + - run: + name: "Set Java Environment" + command: | + echo "JAVA_HOME=${JAVA_HOME}" + echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV + which java && java -version + which javac && javac -version + - run: + name: "Build RocksDBJava Static Library" + command: make V=1 J=8 -j8 rocksdbjavastatic 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-macos-java: + macos: + xcode: 12.5.1 + resource_class: medium + environment: + JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home + ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc causes java 8 crash + steps: + - increase-max-open-files-on-macos + - install-gflags-on-macos + - install-jdk8-on-macos + - pre-steps-macos + - run: + name: "Set Java Environment" + command: | + echo "JAVA_HOME=${JAVA_HOME}" + echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV + which java && java -version + which javac && javac -version + - run: + name: "Build RocksDBJava Shared Library" + command: make V=1 J=8 -j8 rocksdbjava 2>&1 | .circleci/cat_ignore_eagain + - run: + name: "Test RocksDBJava" + command: make V=1 J=8 -j8 jtest 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-macos-java-static: + macos: + xcode: 12.5.1 + resource_class: medium + environment: + JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home + steps: + - increase-max-open-files-on-macos + - install-gflags-on-macos + - install-cmake-on-macos + - install-jdk8-on-macos + - pre-steps-macos + - run: + name: "Set Java Environment" + command: | + echo "JAVA_HOME=${JAVA_HOME}" + echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV + which java && java -version + which javac && javac -version + - run: + name: "Build RocksDBJava x86 and ARM Static Libraries" + command: make V=1 J=8 -j8 rocksdbjavastaticosx 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-macos-java-static-universal: + macos: + xcode: 12.5.1 + resource_class: medium + environment: + JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home + steps: + - increase-max-open-files-on-macos + - install-gflags-on-macos + - install-cmake-on-macos + - install-jdk8-on-macos + - pre-steps-macos + - run: + name: "Set Java Environment" + command: | + echo "JAVA_HOME=${JAVA_HOME}" + echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV + which java && java -version + which javac && javac -version + - run: + name: "Build RocksDBJava Universal Binary Static Library" + command: make V=1 J=8 -j8 rocksdbjavastaticosx_ub 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-examples: + machine: + image: ubuntu-1604:202104-01 + resource_class: large + steps: + - pre-steps + - install-gflags + - run: + name: "Build examples" + command: | + OPT=-DTRAVIS V=1 make -j4 static_lib && cd examples && make -j4 | ../.circleci/cat_ignore_eagain + - post-steps + + build-cmake-mingw: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - run: sudo apt-get update -y && sudo apt-get install -y mingw-w64 + - run: sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix + - run: + name: "Build cmake-mingw" + command: | + sudo apt-get install snapd && sudo snap install cmake --beta --classic + export PATH=/snap/bin:$PATH + sudo apt-get install -y openjdk-8-jdk + export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + export PATH=$JAVA_HOME/bin:$PATH + echo "JAVA_HOME=${JAVA_HOME}" + which java && java -version + which javac && javac -version + mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni + - post-steps + + build-linux-non-shm: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + parameters: + start_test: + default: "" + type: string + end_test: + default: "" + type: string + steps: + - pre-steps + - install-gflags + - install-gtest-parallel + - run: + name: "Build unit tests" + command: | + echo "env: $(env)" + echo "** done env" + ROCKSDBTESTS_START=<> ROCKSDBTESTS_END=<> ROCKSDBTESTS_SUBSET_TESTS_TO_FILE=/tmp/test_list make V=1 -j32 --output-sync=target build_subset_tests + - run: + name: "Run unit tests in parallel" + command: | + sed -i 's/[[:space:]]*$//; s/ / \.\//g; s/.*/.\/&/' /tmp/test_list + cat /tmp/test_list + export TEST_TMPDIR=/tmp/rocksdb_test_tmp + gtest-parallel $(&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-arm: + machine: + image: ubuntu-2004:202101-01 + resource_class: arm.large + steps: + - pre-steps + - install-gflags + - run: ROCKSDBTESTS_PLATFORM_DEPENDENT=only make V=1 J=4 -j4 all_but_some_tests check_some 2>&1 | .circleci/cat_ignore_eagain + - post-steps + + build-linux-arm-cmake-no_test_run: + machine: + image: ubuntu-2004:202101-01 + resource_class: arm.large + environment: + JAVA_HOME: /usr/lib/jvm/java-8-openjdk-arm64 + steps: + - pre-steps + - install-gflags + - run: + name: "Set Java Environment" + command: | + echo "JAVA_HOME=${JAVA_HOME}" + echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV + which java && java -version + which javac && javac -version + - run: + name: "Build with cmake" + command: | + mkdir build + cd build + cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=1 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. + make -j4 + - run: + name: "Build Java with cmake" + command: | + rm -rf build + mkdir build + cd build + cmake -DJNI=1 -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 .. + make -j4 rocksdb rocksdbjni + - post-steps + + build-format-compatible: + machine: + image: ubuntu-1604:202104-01 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-compression-libs + - run: + name: "test" + command: | + export TEST_TMPDIR=/dev/shm/rocksdb + rm -rf /dev/shm/rocksdb + mkdir /dev/shm/rocksdb + tools/check_format_compatible.sh + - post-steps + +workflows: + version: 2 + build-linux: + jobs: + - build-linux + build-linux-cmake: + jobs: + - build-linux-cmake + - build-linux-cmake-ubuntu-20 + build-linux-mem-env-librados: + jobs: + - build-linux-mem-env-librados + build-linux-encrypted-env: + jobs: + - build-linux-encrypted-env + build-linux-shared_lib-alt_namespace-status_checked: + jobs: + - build-linux-shared_lib-alt_namespace-status_checked + build-linux-lite: + jobs: + - build-linux-lite + build-linux-release: + jobs: + - build-linux-release + build-linux-release-rtti: + jobs: + - build-linux-release-rtti + build-linux-lite-release: + jobs: + - build-linux-lite-release + build-linux-clang10-asan: + jobs: + - build-linux-clang10-asan + build-linux-clang10-mini-tsan: + jobs: + - build-linux-clang10-mini-tsan + build-linux-clang10-ubsan: + jobs: + - build-linux-clang10-ubsan + build-linux-clang10-clang-analyze: + jobs: + - build-linux-clang10-clang-analyze + build-linux-unity-and-headers: + jobs: + - build-linux-unity-and-headers + build-windows-vs2019: + jobs: + - build-windows: + name: "build-windows-vs2019" + build-windows-vs2019-cxx20: + jobs: + - build-windows: + name: "build-windows-vs2019-cxx20" + extra_cmake_opt: -DCMAKE_CXX_STANDARD=20 + build-windows-vs2017: + jobs: + - build-windows: + name: "build-windows-vs2017" + vs_year: "2017" + cmake_generator: "Visual Studio 15 Win64" + build-java: + jobs: + - build-linux-java + - build-linux-java-static + - build-macos-java + - build-macos-java-static + - build-macos-java-static-universal + build-examples: + jobs: + - build-examples + build-linux-non-shm: + jobs: + - build-linux-non-shm: + start_test: "" + end_test: "db_options_test" # make sure unique in src.mk + - build-linux-non-shm: + start_test: "db_options_test" # make sure unique in src.mk + end_test: "filename_test" # make sure unique in src.mk + - build-linux-non-shm: + start_test: "filename_test" # make sure unique in src.mk + end_test: "statistics_test" # make sure unique in src.mk + - build-linux-non-shm: + start_test: "statistics_test" # make sure unique in src.mk + end_test: "" + build-linux-compilers-no_test_run: + jobs: + - build-linux-clang-no_test_run + - build-linux-clang-13-no_test_run + - build-linux-gcc-4_8-no_test_run + - build-linux-gcc-8-no_test_run + - build-linux-gcc-9-no_test_run + - build-linux-gcc-10-cxx20-no_test_run + - build-linux-gcc-11-no_test_run + - build-linux-arm-cmake-no_test_run + build-macos: + jobs: + - build-macos + build-macos-cmake: + jobs: + - build-macos-cmake + build-cmake-mingw: + jobs: + - build-cmake-mingw + build-linux-arm: + jobs: + - build-linux-arm + build-microbench: + jobs: + - build-linux-microbench + nightly: + triggers: + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - main + jobs: + - build-format-compatible + - build-linux-arm-test-full diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/ubsan_suppression_list.txt mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/ubsan_suppression_list.txt --- mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/ubsan_suppression_list.txt 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/ubsan_suppression_list.txt 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,6 @@ +# Supress UBSAN warnings related to stl_tree.h, e.g. +# UndefinedBehaviorSanitizer: undefined-behavior /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43 in +# /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43: +# runtime error: upcast of address 0x000001fa8820 with insufficient space for an object of type +# 'std::_Rb_tree_node, rocksdb::(anonymous namespace)::LockHoldingInfo> >' +src:*bits/stl_tree.h diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/vs2015_install.ps1 mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/vs2015_install.ps1 --- mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/vs2015_install.ps1 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/vs2015_install.ps1 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,24 @@ +$VS_DOWNLOAD_LINK = "https://go.microsoft.com/fwlink/?LinkId=691126" +$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe" +curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe +if ($LASTEXITCODE -ne 0) { + echo "Download of the VS 2015 installer failed" + exit 1 +} +$VS_INSTALL_ARGS = @("/Quiet", "/NoRestart") +$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru +Remove-Item -Path vs_installer.exe -Force +$exitCode = $process.ExitCode +if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { + echo "VS 2015 installer exited with code $exitCode, which should be one of [0, 3010]." + curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe + if ($LASTEXITCODE -ne 0) { + echo "Download of the VS Collect tool failed." + exit 1 + } + Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru + New-Item -Path "C:\w\build-results" -ItemType "directory" -Force + Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\" + exit 1 +} +echo "VS 2015 installed." diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/vs2017_install.ps1 mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/vs2017_install.ps1 --- mariadb-10.11.11/storage/rocksdb/rocksdb/.circleci/vs2017_install.ps1 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/.circleci/vs2017_install.ps1 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,35 @@ +$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe" +$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe" +$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", + "--add Microsoft.VisualStudio.Component.VC.Tools.14.13", + "--add Microsoft.Component.MSBuild", + "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", + "--add Microsoft.VisualStudio.Component.TextTemplating", + "--add Microsoft.VisualStudio.Component.VC.CoreIde", + "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core", + "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81") + +curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe +if ($LASTEXITCODE -ne 0) { + echo "Download of the VS 2017 installer failed" + exit 1 +} + +$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru +Remove-Item -Path vs_installer.exe -Force +$exitCode = $process.ExitCode +if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { + echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]." + curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe + if ($LASTEXITCODE -ne 0) { + echo "Download of the VS Collect tool failed." + exit 1 + } + Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru + New-Item -Path "C:\w\build-results" -ItemType "directory" -Force + Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\" + exit 1 +} +echo "VS 2017 installed." diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/.github/workflows/sanity_check.yml mariadb-10.11.13/storage/rocksdb/rocksdb/.github/workflows/sanity_check.yml --- mariadb-10.11.11/storage/rocksdb/rocksdb/.github/workflows/sanity_check.yml 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/.github/workflows/sanity_check.yml 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,44 @@ +name: Check buck targets and code format +on: [push, pull_request] +jobs: + check: + name: Check TARGETS file and code format + runs-on: ubuntu-latest + steps: + - name: Checkout feature branch + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Fetch from upstream + run: | + git remote add upstream https://github.com/facebook/rocksdb.git && git fetch upstream + + - name: Where am I + run: | + echo git status && git status + echo "git remote -v" && git remote -v + echo git branch && git branch + + - name: Setup Python + uses: actions/setup-python@v1 + + - name: Install Dependencies + run: python -m pip install --upgrade pip + + - name: Install argparse + run: pip install argparse + + - name: Download clang-format-diff.py + uses: wei/wget@v1 + with: + args: https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py + + - name: Check format + run: VERBOSE_CHECK=1 make check-format + + - name: Compare buckify output + run: make check-buck-targets + + - name: Simple source code checks + run: make check-sources diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/.gitignore mariadb-10.11.13/storage/rocksdb/rocksdb/.gitignore --- mariadb-10.11.11/storage/rocksdb/rocksdb/.gitignore 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/.gitignore 2025-05-19 16:14:27.000000000 +0000 @@ -1,4 +1,5 @@ make_config.mk +rocksdb.pc *.a *.arc @@ -7,6 +8,7 @@ *.gcda *.gcno *.o +*.o.tmp *.so *.so.* *_test @@ -34,6 +36,7 @@ sst_dump blob_dump block_cache_trace_analyzer +db_with_timestamp_basic_test tools/block_cache_analyzer/*.pyc column_aware_encoding_exp util/build_version.cc @@ -51,6 +54,7 @@ trace_analyzer trace_analyzer_test block_cache_trace_analyzer +io_tracer_parser .DS_Store .vs .vscode @@ -82,3 +86,12 @@ fbcode buckifier/*.pyc buckifier/__pycache__ + +compile_commands.json +clang-format-diff.py +.py3/ + +fuzz/proto/gen/ +fuzz/crash-* + +cmake-build-* diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/.travis.yml mariadb-10.11.13/storage/rocksdb/rocksdb/.travis.yml --- mariadb-10.11.11/storage/rocksdb/rocksdb/.travis.yml 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/.travis.yml 2025-05-19 16:14:27.000000000 +0000 @@ -2,18 +2,19 @@ language: cpp os: - linux - - osx +arch: + - arm64 + - ppc64le + - s390x compiler: - clang - gcc -osx_image: xcode9.4 -jdk: - - openjdk7 cache: - ccache addons: apt: + update: true sources: - ubuntu-toolchain-r-test packages: @@ -24,15 +25,6 @@ - liblzma-dev # xv - libzstd-dev - zlib1g-dev - homebrew: - update: true - packages: - - ccache - - gflags - - lz4 - - snappy - - xz - - zstd env: - TEST_GROUP=platform_dependent # 16-18 minutes @@ -48,43 +40,209 @@ - JOB_NAME=examples # 5-7 minutes - JOB_NAME=cmake # 3-5 minutes - JOB_NAME=cmake-gcc8 # 3-5 minutes + - JOB_NAME=cmake-gcc9 # 3-5 minutes + - JOB_NAME=cmake-gcc9-c++20 # 3-5 minutes - JOB_NAME=cmake-mingw # 3 minutes + - JOB_NAME=make-gcc4.8 + - JOB_NAME=status_checked matrix: exclude: - - os: osx + - os : linux + arch: arm64 + env: JOB_NAME=cmake-mingw + - os : linux + arch: arm64 + env: JOB_NAME=make-gcc4.8 + - os: linux + arch: ppc64le + env: JOB_NAME=cmake-mingw + - os: linux + arch: ppc64le + env: JOB_NAME=make-gcc4.8 + - os: linux + arch: s390x + env: JOB_NAME=cmake-mingw + - os: linux + arch: s390x + env: JOB_NAME=make-gcc4.8 + - os: linux + compiler: clang + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: arm64 + env: TEST_GROUP=platform_dependent + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: TEST_GROUP=1 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le + env: TEST_GROUP=1 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x env: TEST_GROUP=1 - - os: osx + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 env: TEST_GROUP=2 - - os: osx + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le + env: TEST_GROUP=2 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: TEST_GROUP=2 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: TEST_GROUP=3 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le env: TEST_GROUP=3 - - os: osx + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: TEST_GROUP=3 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: TEST_GROUP=4 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le env: TEST_GROUP=4 - - os: osx + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: TEST_GROUP=4 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: JOB_NAME=cmake + - if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/ + os : linux + arch: arm64 + env: JOB_NAME=java_test + - if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/ + os: linux + arch: ppc64le + env: JOB_NAME=java_test + - if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/ + os: linux + arch: s390x + env: JOB_NAME=java_test + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: JOB_NAME=lite_build + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le + env: JOB_NAME=lite_build + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: JOB_NAME=lite_build + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: JOB_NAME=examples + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le + env: JOB_NAME=examples + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: JOB_NAME=examples + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 env: JOB_NAME=cmake-gcc8 - - os : osx - env: JOB_NAME=cmake-mingw - - os : linux - compiler: clang - - os : osx - compiler: gcc + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le + env: JOB_NAME=cmake-gcc8 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: JOB_NAME=cmake-gcc8 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: JOB_NAME=cmake-gcc9 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le + env: JOB_NAME=cmake-gcc9 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: JOB_NAME=cmake-gcc9 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: JOB_NAME=cmake-gcc9-c++20 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le + env: JOB_NAME=cmake-gcc9-c++20 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: JOB_NAME=cmake-gcc9-c++20 + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os : linux + arch: arm64 + env: JOB_NAME=status_checked + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: ppc64le + env: JOB_NAME=status_checked + - if: type = pull_request AND commit_message !~ /FULL_CI/ + os: linux + arch: s390x + env: JOB_NAME=status_checked install: - - if [ "${TRAVIS_OS_NAME}" == osx ]; then - PATH=$PATH:/usr/local/opt/ccache/libexec; - fi - if [ "${JOB_NAME}" == cmake-gcc8 ]; then - sudo apt-get install -y g++-8; + sudo apt-get install -y g++-8 || exit $?; CC=gcc-8 && CXX=g++-8; fi + - if [ "${JOB_NAME}" == cmake-gcc9 ] || [ "${JOB_NAME}" == cmake-gcc9-c++20 ]; then + sudo apt-get install -y g++-9 || exit $?; + CC=gcc-9 && CXX=g++-9; + fi - if [ "${JOB_NAME}" == cmake-mingw ]; then - sudo apt-get install -y mingw-w64 ; + sudo apt-get install -y mingw-w64 || exit $?; + fi + - if [ "${JOB_NAME}" == make-gcc4.8 ]; then + sudo apt-get install -y g++-4.8 || exit $?; + CC=gcc-4.8 && CXX=g++-4.8; fi - - if [[ "${JOB_NAME}" == cmake* ]] && [ "${TRAVIS_OS_NAME}" == linux ]; then - mkdir cmake-dist && curl --silent --fail --show-error --location https://github.com/Kitware/CMake/releases/download/v3.14.5/cmake-3.14.5-Linux-x86_64.tar.gz | tar --strip-components=1 -C cmake-dist -xz && export PATH=$PWD/cmake-dist/bin:$PATH; + - | + if [[ "${JOB_NAME}" == cmake* ]]; then + sudo apt-get remove -y cmake cmake-data + export CMAKE_DEB="cmake-3.14.5-Linux-$(uname -m).deb" + export CMAKE_DEB_URL="https://rocksdb-deps.s3-us-west-2.amazonaws.com/cmake/${CMAKE_DEB}" + curl --silent --fail --show-error --location --output "${CMAKE_DEB}" "${CMAKE_DEB_URL}" || exit $? + sudo dpkg -i "${CMAKE_DEB}" || exit $? + which cmake && cmake --version fi - - if [[ "${JOB_NAME}" == java_test ]]; then - java -version && echo "JAVA_HOME=${JAVA_HOME}"; + - | + if [[ "${JOB_NAME}" == java_test || "${JOB_NAME}" == cmake* ]]; then + # Ensure JDK 8 + sudo apt-get install -y openjdk-8-jdk || exit $? + export PATH=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)/bin:$PATH + export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture) + echo "JAVA_HOME=${JAVA_HOME}" + which java && java -version + which javac && javac -version fi before_script: @@ -93,41 +251,53 @@ - ulimit -n 8192 script: - - ${CXX} --version + - date; ${CXX} --version - if [ `command -v ccache` ]; then ccache -C; fi - case $TEST_GROUP in platform_dependent) - OPT=-DTRAVIS V=1 ROCKSDBTESTS_END=db_block_cache_test make -j4 all_but_some_tests check_some + OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=only make -j4 all_but_some_tests check_some ;; 1) - OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=db_block_cache_test ROCKSDBTESTS_END=db_iter_test make -j4 check_some + OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_END=backupable_db_test make -j4 check_some ;; 2) - OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" V=1 make -j4 tools && OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" V=1 ROCKSDBTESTS_START=db_iter_test ROCKSDBTESTS_END=options_file_test make -j4 check_some + OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 make -j4 tools && OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=backupable_db_test ROCKSDBTESTS_END=db_universal_compaction_test make -j4 check_some ;; 3) - OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=options_file_test ROCKSDBTESTS_END=write_prepared_transaction_test make -j4 check_some + OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=db_universal_compaction_test ROCKSDBTESTS_END=table_properties_collector_test make -j4 check_some ;; 4) - OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=write_prepared_transaction_test make -j4 check_some + OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=table_properties_collector_test make -j4 check_some ;; esac - case $JOB_NAME in java_test) - OPT=-DTRAVIS V=1 make rocksdbjava jtest + OPT=-DTRAVIS LIB_MODE=shared V=1 make rocksdbjava jtest ;; lite_build) - OPT='-DTRAVIS -DROCKSDB_LITE' V=1 make -j4 static_lib tools + OPT='-DTRAVIS -DROCKSDB_LITE' LIB_MODE=shared V=1 make -j4 all ;; examples) - OPT=-DTRAVIS V=1 make -j4 static_lib && cd examples && make -j4 + OPT=-DTRAVIS LIB_MODE=shared V=1 make -j4 static_lib && cd examples && make -j4 ;; cmake-mingw) sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix; mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni ;; cmake*) - mkdir build && cd build && cmake -DJNI=1 .. -DCMAKE_BUILD_TYPE=Release && make -j4 rocksdb rocksdbjni + case $JOB_NAME in + *-c++20) + OPT=-DCMAKE_CXX_STANDARD=20 + ;; + esac + + mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=0 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. && make -j4 && cd .. && rm -rf build && mkdir build && cd build && cmake -DJNI=1 .. -DCMAKE_BUILD_TYPE=Release $OPT && make -j4 rocksdb rocksdbjni + ;; + make-gcc4.8) + OPT=-DTRAVIS LIB_MODE=shared V=1 SKIP_LINK=1 make -j4 all && [ "Linking broken because libgflags compiled with newer ABI" ] + ;; + status_checked) + OPT=-DTRAVIS LIB_MODE=shared V=1 ASSERT_STATUS_CHECKED=1 make -j4 check_some ;; esac notifications: diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/CMakeLists.txt mariadb-10.11.13/storage/rocksdb/rocksdb/CMakeLists.txt --- mariadb-10.11.11/storage/rocksdb/rocksdb/CMakeLists.txt 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/CMakeLists.txt 2025-05-19 16:14:27.000000000 +0000 @@ -32,10 +32,11 @@ # 3. cmake .. # 4. make -j -cmake_minimum_required(VERSION 3.5.1) +cmake_minimum_required(VERSION 3.10) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules/") include(ReadVersion) +include(GoogleTest) get_rocksdb_version(rocksdb_VERSION) project(rocksdb VERSION ${rocksdb_VERSION} @@ -62,6 +63,7 @@ endif(CCACHE_FOUND) option(WITH_JEMALLOC "build with JeMalloc" OFF) +option(WITH_LIBURING "build with liburing" ON) option(WITH_SNAPPY "build with SNAPPY" OFF) option(WITH_LZ4 "build with lz4" OFF) option(WITH_ZLIB "build with zlib" OFF) @@ -70,6 +72,12 @@ if (WITH_WINDOWS_UTF8_FILENAMES) add_definitions(-DROCKSDB_WINDOWS_UTF8_FILENAMES) endif() + +if ($ENV{CIRCLECI}) + message(STATUS "Build for CircieCI env, a few tests may be disabled") + add_definitions(-DCIRCLECI) +endif() + # third-party/folly is only validated to work on Linux and Windows for now. # So only turn it on there by default. if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows") @@ -83,15 +91,18 @@ option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) endif() +if( NOT DEFINED CMAKE_CXX_STANDARD ) + set(CMAKE_CXX_STANDARD 11) +endif() + include(CMakeDependentOption) -CMAKE_DEPENDENT_OPTION(WITH_GFLAGS "build with GFlags" ON - "NOT MSVC;NOT MINGW" OFF) if(MSVC) + option(WITH_GFLAGS "build with GFlags" OFF) option(WITH_XPRESS "build with windows built in compression" OFF) include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc) else() - if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") # FreeBSD has jemalloc as default malloc # but it does not have all the jemalloc files in include/... set(WITH_JEMALLOC ON) @@ -103,18 +114,40 @@ endif() endif() - # No config file for this + if(MINGW) + option(WITH_GFLAGS "build with GFlags" OFF) + else() + option(WITH_GFLAGS "build with GFlags" ON) + endif() + set(GFLAGS_LIB) if(WITH_GFLAGS) - find_package(gflags REQUIRED) + # Config with namespace available since gflags 2.2.2 + option(GFLAGS_USE_TARGET_NAMESPACE "Use gflags import target with namespace." ON) + find_package(gflags CONFIG) + if(gflags_FOUND) + if(TARGET ${GFLAGS_TARGET}) + # Config with GFLAGS_TARGET available since gflags 2.2.0 + set(GFLAGS_LIB ${GFLAGS_TARGET}) + else() + # Config with GFLAGS_LIBRARIES available since gflags 2.1.0 + set(GFLAGS_LIB ${gflags_LIBRARIES}) + endif() + else() + find_package(gflags REQUIRED) + set(GFLAGS_LIB gflags::gflags) + endif() + include_directories(${GFLAGS_INCLUDE_DIR}) + list(APPEND THIRDPARTY_LIBS ${GFLAGS_LIB}) add_definitions(-DGFLAGS=1) - include_directories(${gflags_INCLUDE_DIR}) - list(APPEND THIRDPARTY_LIBS gflags::gflags) endif() if(WITH_SNAPPY) - find_package(snappy REQUIRED) + find_package(Snappy CONFIG) + if(NOT Snappy_FOUND) + find_package(Snappy REQUIRED) + endif() add_definitions(-DSNAPPY) - list(APPEND THIRDPARTY_LIBS snappy::snappy) + list(APPEND THIRDPARTY_LIBS Snappy::snappy) endif() if(WITH_ZLIB) @@ -149,23 +182,25 @@ endif() endif() -string(TIMESTAMP TS "%Y/%m/%d %H:%M:%S" UTC) -set(GIT_DATE_TIME "${TS}" CACHE STRING "the time we first built rocksdb") +string(TIMESTAMP TS "%Y-%m-%d %H:%M:%S" UTC) +set(BUILD_DATE "${TS}" CACHE STRING "the time we first built rocksdb") find_package(Git) if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") - if(WIN32) - execute_process(COMMAND $ENV{COMSPEC} /C ${GIT_EXECUTABLE} -C ${CMAKE_CURRENT_SOURCE_DIR} rev-parse HEAD OUTPUT_VARIABLE GIT_SHA) - else() - execute_process(COMMAND ${GIT_EXECUTABLE} -C ${CMAKE_CURRENT_SOURCE_DIR} rev-parse HEAD OUTPUT_VARIABLE GIT_SHA) + execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_SHA COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD ) + execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE GIT_MOD COMMAND "${GIT_EXECUTABLE}" diff-index HEAD --quiet) + execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_DATE COMMAND "${GIT_EXECUTABLE}" log -1 --date=format:"%Y-%m-%d %T" --format="%ad") + execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG RESULT_VARIABLE rv COMMAND "${GIT_EXECUTABLE}" symbolic-ref -q --short HEAD OUTPUT_STRIP_TRAILING_WHITESPACE) + if (rv AND NOT rv EQUAL 0) + execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG COMMAND "${GIT_EXECUTABLE}" describe --tags --exact-match OUTPUT_STRIP_TRAILING_WHITESPACE) endif() else() set(GIT_SHA 0) + set(GIT_MOD 1) endif() - -string(REGEX REPLACE "[^0-9a-f]+" "" GIT_SHA "${GIT_SHA}") - +string(REGEX REPLACE "[^0-9a-fA-F]+" "" GIT_SHA "${GIT_SHA}") +string(REGEX REPLACE "[^0-9: /-]+" "" GIT_DATE "${GIT_DATE}") option(WITH_MD_LIBRARY "build with MD" ON) if(WIN32 AND MSVC) @@ -178,20 +213,20 @@ set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc) configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY) -add_library(build_version OBJECT ${BUILD_VERSION_CC}) -target_include_directories(build_version PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/util) + if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W4 /wd4127 /wd4800 /wd4996 /wd4351 /wd4100 /wd4204 /wd4324") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wextra -Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wextra -Wall -pthread") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wsign-compare -Wshadow -Wno-unused-parameter -Wno-unused-variable -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers -Wno-strict-aliasing") + if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wstrict-prototypes") + endif() if(MINGW) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format -fno-asynchronous-unwind-tables") add_definitions(-D_POSIX_C_SOURCE=1) endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") include(CheckCXXCompilerFlag) @@ -203,49 +238,91 @@ endif() include(CheckCCompilerFlag) -if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") + CHECK_C_COMPILER_FLAG("-mcpu=power9" HAS_POWER9) + if(HAS_POWER9) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power9 -mtune=power9") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power9 -mtune=power9") + else() + CHECK_C_COMPILER_FLAG("-mcpu=power8" HAS_POWER8) + if(HAS_POWER8) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8 -mtune=power8") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8 -mtune=power8") + endif(HAS_POWER8) + endif(HAS_POWER9) CHECK_C_COMPILER_FLAG("-maltivec" HAS_ALTIVEC) if(HAS_ALTIVEC) message(STATUS " HAS_ALTIVEC yes") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maltivec") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8") endif(HAS_ALTIVEC) -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le") +endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") -if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") CHECK_C_COMPILER_FLAG("-march=armv8-a+crc+crypto" HAS_ARMV8_CRC) if(HAS_ARMV8_CRC) message(STATUS " HAS_ARMV8_CRC yes") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") endif(HAS_ARMV8_CRC) -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") +endif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") + CHECK_C_COMPILER_FLAG("-march=native" HAS_S390X_MARCH_NATIVE) + if(HAS_S390X_MARCH_NATIVE) + message(STATUS " HAS_S390X_MARCH_NATIVE yes") + endif(HAS_S390X_MARCH_NATIVE) +endif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") option(PORTABLE "build a portable binary" OFF) option(FORCE_SSE42 "force building with SSE4.2, even when PORTABLE=ON" OFF) +option(FORCE_AVX "force building with AVX, even when PORTABLE=ON" OFF) +option(FORCE_AVX2 "force building with AVX2, even when PORTABLE=ON" OFF) if(PORTABLE) # MSVC does not need a separate compiler flag to enable SSE4.2; if nmmintrin.h # is available, it is available by default. if(FORCE_SSE42 AND NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mpclmul") endif() + if(MSVC) + if(FORCE_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") + endif() + # MSVC automatically enables BMI / lzcnt with AVX2. + if(FORCE_AVX2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") + endif() + else() + if(FORCE_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + endif() + if(FORCE_AVX2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi -mlzcnt") + endif() + if(CMAKE_SYSTEM_PROCESSOR MATCHES "^s390x") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z196") + endif() + endif() else() if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") else() - if(NOT HAVE_POWER8 AND NOT HAS_ARMV8_CRC) + if(CMAKE_SYSTEM_PROCESSOR MATCHES "^s390x" AND NOT HAS_S390X_MARCH_NATIVE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z196") + elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64" AND NOT HAS_ARMV8_CRC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() endif() endif() include(CheckCXXSourceCompiles) +set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) if(NOT MSVC) set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul") endif() -CHECK_CXX_SOURCE_COMPILES(" + +if (NOT PORTABLE OR FORCE_SSE42) + CHECK_CXX_SOURCE_COMPILES(" #include #include #include @@ -257,26 +334,66 @@ auto d = _mm_cvtsi128_si64(c); } " HAVE_SSE42) -unset(CMAKE_REQUIRED_FLAGS) -if(HAVE_SSE42) - add_definitions(-DHAVE_SSE42) - add_definitions(-DHAVE_PCLMUL) -elseif(FORCE_SSE42) - message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled") + if(HAVE_SSE42) + add_definitions(-DHAVE_SSE42) + add_definitions(-DHAVE_PCLMUL) + elseif(FORCE_SSE42) + message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled") + endif() +endif() + +# Check if -latomic is required or not +if (NOT MSVC) + set(CMAKE_REQUIRED_FLAGS "--std=c++11") + CHECK_CXX_SOURCE_COMPILES(" +#include +std::atomic x(0); +int main() { + uint64_t i = x.load(std::memory_order_relaxed); + bool b = x.is_lock_free(); + return 0; +} +" BUILTIN_ATOMIC) + if (NOT BUILTIN_ATOMIC) + #TODO: Check if -latomic exists + list(APPEND THIRDPARTY_LIBS atomic) + endif() endif() +if (WITH_LIBURING) + find_package(uring) + if (uring_FOUND) + add_definitions(-DROCKSDB_IOURING_PRESENT) + list(APPEND THIRDPARTY_LIBS uring::uring) + endif() +endif() + +# Reset the required flags +set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) + CHECK_CXX_SOURCE_COMPILES(" #if defined(_MSC_VER) && !defined(__thread) #define __thread __declspec(thread) #endif int main() { static __thread int tls; + (void)tls; } " HAVE_THREAD_LOCAL) if(HAVE_THREAD_LOCAL) add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) endif() +option(WITH_IOSTATS_CONTEXT "Enable IO stats context" ON) +if (NOT WITH_IOSTATS_CONTEXT) + add_definitions(-DNIOSTATS_CONTEXT) +endif() + +option(WITH_PERF_CONTEXT "Enable perf context" ON) +if (NOT WITH_PERF_CONTEXT) + add_definitions(-DNPERF_CONTEXT) +endif() + option(FAIL_ON_WARNINGS "Treat compile warnings as errors" ON) if(FAIL_ON_WARNINGS) if(MSVC) @@ -343,6 +460,12 @@ add_definitions(-DROCKSDB_NO_DYNAMIC_EXTENSION) endif() +option(ASSERT_STATUS_CHECKED "build with assert status checked" OFF) +if (ASSERT_STATUS_CHECKED) + message(STATUS "Build with assert status checked") + add_definitions(-DROCKSDB_ASSERT_STATUS_CHECKED) +endif() + if(DEFINED USE_RTTI) if(USE_RTTI) message(STATUS "Enabling RTTI") @@ -377,7 +500,15 @@ message(STATUS "Debug optimization is enabled") set(CMAKE_CXX_FLAGS_DEBUG "/Oxt") else() - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Od /RTC1 /Gm") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Od /RTC1") + + # Minimal Build is deprecated after MSVC 2015 + if( MSVC_VERSION GREATER 1900 ) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Gm-") + else() + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Gm") + endif() + endif() if(WITH_RUNTIME_DEBUG) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /${RUNTIME_LIBRARY}d") @@ -404,15 +535,12 @@ add_definitions(-fno-builtin-memcmp -DCYGWIN) elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) - if(CMAKE_SYSTEM_PROCESSOR MATCHES arm) - add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE) - # no debug info for IOS, that will make our library big - add_definitions(-DNDEBUG) - endif() elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") add_definitions(-DOS_LINUX) elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") add_definitions(-DOS_SOLARIS) +elseif(CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") + add_definitions(-DOS_GNU_KFREEBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") add_definitions(-DOS_FREEBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD") @@ -471,7 +599,11 @@ endif() include(CheckCXXSymbolExists) -check_cxx_symbol_exists(malloc_usable_size malloc.h HAVE_MALLOC_USABLE_SIZE) +if(CMAKE_SYSTEM_NAME MATCHES "^FreeBSD") + check_cxx_symbol_exists(malloc_usable_size malloc_np.h HAVE_MALLOC_USABLE_SIZE) +else() + check_cxx_symbol_exists(malloc_usable_size malloc.h HAVE_MALLOC_USABLE_SIZE) +endif() if(HAVE_MALLOC_USABLE_SIZE) add_definitions(-DROCKSDB_MALLOC_USABLE_SIZE) endif() @@ -481,9 +613,18 @@ add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT) endif() +check_cxx_symbol_exists(getauxval auvx.h HAVE_AUXV_GETAUXVAL) +if(HAVE_AUXV_GETAUXVAL) + add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT) +endif() + +check_cxx_symbol_exists(F_FULLFSYNC "fcntl.h" HAVE_FULLFSYNC) +if(HAVE_FULLFSYNC) + add_definitions(-DHAVE_FULLFSYNC) +endif() + include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR}/include) -include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third-party/gtest-1.8.1/fused-src) if(WITH_FOLLY_DISTRIBUTED_MUTEX) include_directories(${PROJECT_SOURCE_DIR}/third-party/folly) endif() @@ -492,14 +633,29 @@ # Main library source code set(SOURCES + cache/cache.cc + cache/cache_entry_roles.cc + cache/cache_key.cc + cache/cache_reservation_manager.cc cache/clock_cache.cc cache/lru_cache.cc cache/sharded_cache.cc db/arena_wrapped_db_iter.cc + db/blob/blob_fetcher.cc + db/blob/blob_file_addition.cc + db/blob/blob_file_builder.cc + db/blob/blob_file_cache.cc + db/blob/blob_file_garbage.cc + db/blob/blob_file_meta.cc + db/blob/blob_file_reader.cc + db/blob/blob_garbage_meter.cc + db/blob/blob_log_format.cc + db/blob/blob_log_sequential_reader.cc + db/blob/blob_log_writer.cc + db/blob/prefetch_buffer_collection.cc db/builder.cc db/c.cc db/column_family.cc - db/compacted_db_impl.cc db/compaction/compaction.cc db/compaction/compaction_iterator.cc db/compaction/compaction_picker.cc @@ -507,8 +663,10 @@ db/compaction/compaction_picker_fifo.cc db/compaction/compaction_picker_level.cc db/compaction/compaction_picker_universal.cc + db/compaction/sst_partitioner.cc db/convenience.cc db/db_filesnapshot.cc + db/db_impl/compacted_db_impl.cc db/db_impl/db_impl.cc db/db_impl/db_impl_write.cc db/db_impl/db_impl_compaction_flush.cc @@ -539,6 +697,8 @@ db/memtable_list.cc db/merge_helper.cc db/merge_operator.cc + db/output_validator.cc + db/periodic_work_scheduler.cc db/range_del_aggregator.cc db/range_tombstone_fragmenter.cc db/repair.cc @@ -549,22 +709,29 @@ db/trim_history_scheduler.cc db/version_builder.cc db/version_edit.cc + db/version_edit_handler.cc db/version_set.cc + db/wal_edit.cc db/wal_manager.cc db/write_batch.cc db/write_batch_base.cc db/write_controller.cc db/write_thread.cc + env/composite_env.cc env/env.cc env/env_chroot.cc env/env_encryption.cc env/env_hdfs.cc env/file_system.cc + env/file_system_tracer.cc + env/fs_remap.cc env/mock_env.cc + env/unique_id_gen.cc file/delete_scheduler.cc file/file_prefetch_buffer.cc file/file_util.cc file/filename.cc + file/line_file_reader.cc file/random_access_file_reader.cc file/read_write_util.cc file/readahead_raf.cc @@ -577,6 +744,8 @@ memory/arena.cc memory/concurrent_arena.cc memory/jemalloc_nodump_allocator.cc + memory/memkind_kmem_allocator.cc + memory/memory_allocator.cc memtable/alloc_tracker.cc memtable/hash_linklist_rep.cc memtable/hash_skiplist_rep.cc @@ -597,19 +766,23 @@ monitoring/thread_status_util.cc monitoring/thread_status_util_debug.cc options/cf_options.cc + options/configurable.cc + options/customizable.cc options/db_options.cc options/options.cc options/options_helper.cc options/options_parser.cc - options/options_sanity_check.cc port/stack_trace.cc table/adaptive/adaptive_table_factory.cc + table/block_based/binary_search_index_reader.cc table/block_based/block.cc table/block_based/block_based_filter_block.cc table/block_based/block_based_table_builder.cc table/block_based/block_based_table_factory.cc + table/block_based/block_based_table_iterator.cc table/block_based/block_based_table_reader.cc table/block_based/block_builder.cc + table/block_based/block_prefetcher.cc table/block_based/block_prefix_index.cc table/block_based/data_block_hash_index.cc table/block_based/data_block_footer.cc @@ -617,9 +790,14 @@ table/block_based/filter_policy.cc table/block_based/flush_block_policy.cc table/block_based/full_filter_block.cc + table/block_based/hash_index_reader.cc table/block_based/index_builder.cc + table/block_based/index_reader_common.cc table/block_based/parsed_full_filter_block.cc table/block_based/partitioned_filter_block.cc + table/block_based/partitioned_index_iterator.cc + table/block_based/partitioned_index_reader.cc + table/block_based/reader_common.cc table/block_based/uncompression_dict_reader.cc table/block_fetcher.cc table/cuckoo/cuckoo_table_builder.cc @@ -637,22 +815,30 @@ table/plain/plain_table_index.cc table/plain/plain_table_key_coding.cc table/plain/plain_table_reader.cc + table/sst_file_dumper.cc table/sst_file_reader.cc table/sst_file_writer.cc + table/table_factory.cc table/table_properties.cc table/two_level_iterator.cc + table/unique_id.cc test_util/sync_point.cc test_util/sync_point_impl.cc test_util/testutil.cc test_util/transaction_test_util.cc tools/block_cache_analyzer/block_cache_trace_analyzer.cc tools/dump/db_dump_tool.cc + tools/io_tracer_parser_tool.cc tools/ldb_cmd.cc tools/ldb_tool.cc tools/sst_dump_tool.cc tools/trace_analyzer_tool.cc - trace_replay/trace_replay.cc trace_replay/block_cache_tracer.cc + trace_replay/io_tracer.cc + trace_replay/trace_record_handler.cc + trace_replay/trace_record_result.cc + trace_replay/trace_record.cc + trace_replay/trace_replay.cc util/coding.cc util/compaction_job_stats_impl.cc util/comparator.cc @@ -664,6 +850,8 @@ util/murmurhash.cc util/random.cc util/rate_limiter.cc + util/ribbon_config.cc + util/regex.cc util/slice.cc util/file_checksum_helper.cc util/status.cc @@ -678,19 +866,23 @@ utilities/blob_db/blob_db_impl_filesnapshot.cc utilities/blob_db/blob_dump_tool.cc utilities/blob_db/blob_file.cc - utilities/blob_db/blob_log_reader.cc - utilities/blob_db/blob_log_writer.cc - utilities/blob_db/blob_log_format.cc + utilities/cache_dump_load.cc + utilities/cache_dump_load_impl.cc utilities/cassandra/cassandra_compaction_filter.cc utilities/cassandra/format.cc utilities/cassandra/merge_operator.cc utilities/checkpoint/checkpoint_impl.cc + utilities/compaction_filters.cc utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc utilities/debug.cc utilities/env_mirror.cc utilities/env_timed.cc + utilities/fault_injection_env.cc + utilities/fault_injection_fs.cc + utilities/fault_injection_secondary_cache.cc utilities/leveldb_options/leveldb_options.cc utilities/memory/memory_util.cc + utilities/merge_operators.cc utilities/merge_operators/bytesxor.cc utilities/merge_operators/max.cc utilities/merge_operators/put.cc @@ -710,6 +902,12 @@ utilities/simulator_cache/sim_cache.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc utilities/trace/file_trace_reader_writer.cc + utilities/trace/replayer_impl.cc + utilities/transactions/lock/lock_manager.cc + utilities/transactions/lock/point/point_lock_tracker.cc + utilities/transactions/lock/point/point_lock_manager.cc + utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc + utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc utilities/transactions/optimistic_transaction_db_impl.cc utilities/transactions/optimistic_transaction.cc utilities/transactions/pessimistic_transaction.cc @@ -717,16 +915,54 @@ utilities/transactions/snapshot_checker.cc utilities/transactions/transaction_base.cc utilities/transactions/transaction_db_mutex_impl.cc - utilities/transactions/transaction_lock_mgr.cc utilities/transactions/transaction_util.cc utilities/transactions/write_prepared_txn.cc utilities/transactions/write_prepared_txn_db.cc utilities/transactions/write_unprepared_txn.cc utilities/transactions/write_unprepared_txn_db.cc utilities/ttl/db_ttl_impl.cc + utilities/wal_filter.cc utilities/write_batch_with_index/write_batch_with_index.cc - utilities/write_batch_with_index/write_batch_with_index_internal.cc - $) + utilities/write_batch_with_index/write_batch_with_index_internal.cc) + +list(APPEND SOURCES + utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc + utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc + utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc + utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc + utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc + utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc + utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc + utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc + utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc + utilities/transactions/lock/range/range_tree/lib/standalone_port.cc + utilities/transactions/lock/range/range_tree/lib/util/dbt.cc + utilities/transactions/lock/range/range_tree/lib/util/memarena.cc) + +message(STATUS "ROCKSDB_PLUGINS: ${ROCKSDB_PLUGINS}") +if ( ROCKSDB_PLUGINS ) + string(REPLACE " " ";" PLUGINS ${ROCKSDB_PLUGINS}) + foreach (plugin ${PLUGINS}) + add_subdirectory("plugin/${plugin}") + foreach (src ${${plugin}_SOURCES}) + list(APPEND SOURCES plugin/${plugin}/${src}) + set_source_files_properties( + plugin/${plugin}/${src} + PROPERTIES COMPILE_FLAGS "${${plugin}_COMPILE_FLAGS}") + endforeach() + foreach (path ${${plugin}_INCLUDE_PATHS}) + include_directories(${path}) + endforeach() + foreach (lib ${${plugin}_LIBS}) + list(APPEND THIRDPARTY_LIBS ${lib}) + endforeach() + foreach (link_path ${${plugin}_LINK_PATHS}) + link_directories(AFTER ${link_path}) + endforeach() + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${${plugin}_CMAKE_SHARED_LINKER_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${${plugin}_CMAKE_EXE_LINKER_FLAGS}") + endforeach() +endif() if(HAVE_SSE42 AND NOT MSVC) set_source_files_properties( @@ -734,11 +970,11 @@ PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") endif() -if(HAVE_POWER8) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") list(APPEND SOURCES util/crc32c_ppc.c util/crc32c_ppc_asm.S) -endif(HAVE_POWER8) +endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") if(HAS_ARMV8_CRC) list(APPEND SOURCES @@ -753,7 +989,6 @@ port/win/port_win.cc port/win/win_logger.cc port/win/win_thread.cc) - if(WITH_XPRESS) list(APPEND SOURCES port/win/xpress_win.cc) @@ -799,13 +1034,13 @@ set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT}) endif() -add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES}) -target_link_libraries(${ROCKSDB_STATIC_LIB} +add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES} ${BUILD_VERSION_CC}) +target_link_libraries(${ROCKSDB_STATIC_LIB} PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) if(ROCKSDB_BUILD_SHARED) - add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES}) - target_link_libraries(${ROCKSDB_SHARED_LIB} + add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES} ${BUILD_VERSION_CC}) + target_link_libraries(${ROCKSDB_SHARED_LIB} PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) if(WIN32) @@ -822,8 +1057,7 @@ LINKER_LANGUAGE CXX VERSION ${rocksdb_VERSION} SOVERSION ${rocksdb_VERSION_MAJOR} - CXX_STANDARD 11 - OUTPUT_NAME "rocksdb") + OUTPUT_NAME "rocksdb${ARTIFACT_SUFFIX}") endif() endif() @@ -834,6 +1068,16 @@ endif() option(WITH_JNI "build with JNI" OFF) +# Tests are excluded from Release builds +CMAKE_DEPENDENT_OPTION(WITH_TESTS "build with tests" ON + "CMAKE_BUILD_TYPE STREQUAL Debug" OFF) +option(WITH_BENCHMARK_TOOLS "build with benchmarks" ON) +option(WITH_CORE_TOOLS "build with ldb and sst_dump" ON) +option(WITH_TOOLS "build with tools" ON) + +if(WITH_TESTS OR WITH_BENCHMARK_TOOLS OR WITH_TOOLS OR WITH_JNI OR JNI) + include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third-party/gtest-1.8.1/fused-src) +endif() if(WITH_JNI OR JNI) message(STATUS "JNI library is enabled") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/java) @@ -871,6 +1115,8 @@ install(DIRECTORY include/rocksdb COMPONENT devel DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + install(DIRECTORY "${PROJECT_SOURCE_DIR}/cmake/modules" COMPONENT devel DESTINATION ${package_config_destination}) + install( TARGETS ${ROCKSDB_STATIC_LIB} EXPORT RocksDBTargets @@ -907,29 +1153,49 @@ ) endif() -# Tests are excluded from Release builds -CMAKE_DEPENDENT_OPTION(WITH_TESTS "build with tests" ON - "CMAKE_BUILD_TYPE STREQUAL Debug" OFF) -if(WITH_TESTS) +option(WITH_ALL_TESTS "Build all test, rather than a small subset" ON) + +if(WITH_TESTS OR WITH_BENCHMARK_TOOLS) add_subdirectory(third-party/gtest-1.8.1/fused-src/gtest) add_library(testharness STATIC + test_util/mock_time_env.cc test_util/testharness.cc) target_link_libraries(testharness gtest) +endif() +if(WITH_TESTS) set(TESTS + db/db_basic_test.cc + env/env_basic_test.cc + ) + if(WITH_ALL_TESTS) + list(APPEND TESTS + cache/cache_reservation_manager_test.cc cache/cache_test.cc cache/lru_cache_test.cc + db/blob/blob_counting_iterator_test.cc + db/blob/blob_file_addition_test.cc + db/blob/blob_file_builder_test.cc + db/blob/blob_file_cache_test.cc + db/blob/blob_file_garbage_test.cc + db/blob/blob_file_reader_test.cc + db/blob/blob_garbage_meter_test.cc + db/blob/db_blob_basic_test.cc + db/blob/db_blob_compaction_test.cc + db/blob/db_blob_corruption_test.cc + db/blob/db_blob_index_test.cc db/column_family_test.cc db/compact_files_test.cc + db/compaction/clipping_iterator_test.cc db/compaction/compaction_job_stats_test.cc db/compaction/compaction_job_test.cc db/compaction/compaction_iterator_test.cc db/compaction/compaction_picker_test.cc + db/compaction/compaction_service_test.cc db/comparator_db_test.cc db/corruption_test.cc db/cuckoo_table_db_test.cc - db/db_basic_test.cc - db/db_blob_index_test.cc + db/db_with_timestamp_basic_test.cc db/db_block_cache_test.cc db/db_bloom_filter_test.cc db/db_compaction_filter_test.cc @@ -941,6 +1207,7 @@ db/db_iter_test.cc db/db_iter_stress_test.cc db/db_iterator_test.cc + db/db_kv_checksum_test.cc db/db_log_iter_test.cc db/db_memtable_test.cc db/db_merge_operator_test.cc @@ -948,19 +1215,21 @@ db/db_options_test.cc db/db_properties_test.cc db/db_range_del_test.cc - db/db_impl/db_secondary_test.cc + db/db_secondary_test.cc db/db_sst_test.cc db/db_statistics_test.cc db/db_table_properties_test.cc db/db_tailing_iter_test.cc db/db_test.cc db/db_test2.cc + db/db_logical_block_size_cache_test.cc db/db_universal_compaction_test.cc db/db_wal_test.cc + db/db_with_timestamp_compaction_test.cc db/db_write_test.cc db/dbformat_test.cc db/deletefile_test.cc - db/error_handler_test.cc + db/error_handler_fs_test.cc db/obsolete_files_test.cc db/external_sst_file_basic_test.cc db/external_sst_file_test.cc @@ -976,6 +1245,7 @@ db/merge_test.cc db/options_file_test.cc db/perf_context_test.cc + db/periodic_work_scheduler_test.cc db/plain_table_db_test.cc db/prefix_test.cc db/range_del_aggregator_test.cc @@ -986,17 +1256,21 @@ db/version_edit_test.cc db/version_set_test.cc db/wal_manager_test.cc + db/wal_edit_test.cc db/write_batch_test.cc db/write_callback_test.cc db/write_controller_test.cc - env/env_basic_test.cc env/env_test.cc + env/io_posix_test.cc env/mock_env_test.cc file/delete_scheduler_test.cc + file/prefetch_test.cc + file/random_access_file_reader_test.cc logging/auto_roll_logger_test.cc logging/env_logger_test.cc logging/event_logger_test.cc memory/arena_test.cc + memory/memory_allocator_test.cc memtable/inlineskiplist_test.cc memtable/skiplist_test.cc memtable/write_buffer_manager_test.cc @@ -1004,9 +1278,12 @@ monitoring/iostats_context_test.cc monitoring/statistics_test.cc monitoring/stats_history_test.cc + options/configurable_test.cc + options/customizable_test.cc options/options_settable_test.cc options/options_test.cc table/block_based/block_based_filter_block_test.cc + table/block_based/block_based_table_reader_test.cc table/block_based/block_test.cc table/block_based/data_block_hash_index_test.cc table/block_based/full_filter_block_test.cc @@ -1017,7 +1294,12 @@ table/merger_test.cc table/sst_file_reader_test.cc table/table_test.cc + table/block_fetcher_test.cc + test_util/testutil_test.cc + trace_replay/block_cache_tracer_test.cc + trace_replay/io_tracer_test.cc tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc + tools/io_tracer_parser_test.cc tools/ldb_cmd_test.cc tools/reduce_levels_test.cc tools/sst_dump_test.cc @@ -1035,11 +1317,14 @@ util/random_test.cc util/rate_limiter_test.cc util/repeatable_thread_test.cc + util/ribbon_test.cc util/slice_test.cc util/slice_transform_test.cc util/timer_queue_test.cc + util/timer_test.cc util/thread_list_test.cc util/thread_local_test.cc + util/work_queue_test.cc utilities/backupable/backupable_db_test.cc utilities/blob_db/blob_db_test.cc utilities/cassandra/cassandra_functional_test.cc @@ -1059,11 +1344,14 @@ utilities/table_properties_collectors/compact_on_deletion_collector_test.cc utilities/transactions/optimistic_transaction_test.cc utilities/transactions/transaction_test.cc + utilities/transactions/lock/point/point_lock_manager_test.cc utilities/transactions/write_prepared_transaction_test.cc utilities/transactions/write_unprepared_transaction_test.cc + utilities/transactions/lock/range/range_locking_test.cc utilities/ttl/ttl_test.cc utilities/write_batch_with_index/write_batch_with_index_test.cc - ) + ) + endif() if(WITH_LIBRADOS) list(APPEND TESTS utilities/env_librados_test.cc) endif() @@ -1076,7 +1364,6 @@ db/db_test_util.cc monitoring/thread_status_updater_debug.cc table/mock_table.cc - test_util/fault_injection_test_env.cc utilities/cassandra/test_utils.cc ) enable_testing() @@ -1091,21 +1378,25 @@ PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD_RELEASE 1 EXCLUDE_FROM_DEFAULT_BUILD_MINRELEASE 1 EXCLUDE_FROM_DEFAULT_BUILD_RELWITHDEBINFO 1 - ) + ) foreach(sourcefile ${TESTS}) get_filename_component(exename ${sourcefile} NAME_WE) - add_executable(${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX} ${sourcefile}) - set_target_properties(${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX} + add_executable(${exename}${ARTIFACT_SUFFIX} ${sourcefile}) + set_target_properties(${exename}${ARTIFACT_SUFFIX} PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD_RELEASE 1 EXCLUDE_FROM_DEFAULT_BUILD_MINRELEASE 1 EXCLUDE_FROM_DEFAULT_BUILD_RELWITHDEBINFO 1 OUTPUT_NAME ${exename}${ARTIFACT_SUFFIX} - ) - target_link_libraries(${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX} testutillib${ARTIFACT_SUFFIX} testharness gtest ${ROCKSDB_LIB}) + ) + target_link_libraries(${exename}${ARTIFACT_SUFFIX} testutillib${ARTIFACT_SUFFIX} testharness gtest ${THIRDPARTY_LIBS} ${ROCKSDB_LIB}) if(NOT "${exename}" MATCHES "db_sanity_test") - add_test(NAME ${exename} COMMAND ${exename}${ARTIFACT_SUFFIX}) - add_dependencies(check ${CMAKE_PROJECT_NAME}_${exename}${ARTIFACT_SUFFIX}) + gtest_discover_tests(${exename} DISCOVERY_TIMEOUT 120) + add_dependencies(check ${exename}${ARTIFACT_SUFFIX}) + endif() + if("${exename}" MATCHES "env_librados_test") + # env_librados_test.cc uses librados directly + target_link_libraries(${exename}${ARTIFACT_SUFFIX} rados) endif() endforeach(sourcefile ${TESTS}) @@ -1122,57 +1413,71 @@ if(ROCKSDB_LIB_FOR_C) set(C_TESTS db/c_test.c) - # C executables must link to a shared object add_executable(c_test db/c_test.c) - target_link_libraries(c_test ${ROCKSDB_SHARED_LIB} testharness) + target_link_libraries(c_test ${ROCKSDB_LIB_FOR_C} testharness) add_test(NAME c_test COMMAND c_test${ARTIFACT_SUFFIX}) add_dependencies(check c_test) endif() endif() -option(WITH_BENCHMARK_TOOLS "build with benchmarks" ON) if(WITH_BENCHMARK_TOOLS) - add_executable(db_bench + add_executable(db_bench${ARTIFACT_SUFFIX} + tools/simulated_hybrid_file_system.cc tools/db_bench.cc tools/db_bench_tool.cc) - target_link_libraries(db_bench - ${ROCKSDB_LIB}) + target_link_libraries(db_bench${ARTIFACT_SUFFIX} + ${ROCKSDB_LIB} ${THIRDPARTY_LIBS}) - add_executable(cache_bench - cache/cache_bench.cc) - target_link_libraries(cache_bench - ${ROCKSDB_LIB}) + add_executable(cache_bench${ARTIFACT_SUFFIX} + cache/cache_bench.cc + cache/cache_bench_tool.cc) + target_link_libraries(cache_bench${ARTIFACT_SUFFIX} + ${ROCKSDB_LIB} ${GFLAGS_LIB}) - add_executable(memtablerep_bench + add_executable(memtablerep_bench${ARTIFACT_SUFFIX} memtable/memtablerep_bench.cc) - target_link_libraries(memtablerep_bench - ${ROCKSDB_LIB}) + target_link_libraries(memtablerep_bench${ARTIFACT_SUFFIX} + ${ROCKSDB_LIB} ${GFLAGS_LIB}) - add_executable(range_del_aggregator_bench + add_executable(range_del_aggregator_bench${ARTIFACT_SUFFIX} db/range_del_aggregator_bench.cc) - target_link_libraries(range_del_aggregator_bench - ${ROCKSDB_LIB}) + target_link_libraries(range_del_aggregator_bench${ARTIFACT_SUFFIX} + ${ROCKSDB_LIB} ${GFLAGS_LIB}) - add_executable(table_reader_bench + add_executable(table_reader_bench${ARTIFACT_SUFFIX} table/table_reader_bench.cc) - target_link_libraries(table_reader_bench - ${ROCKSDB_LIB} testharness) + target_link_libraries(table_reader_bench${ARTIFACT_SUFFIX} + ${ROCKSDB_LIB} testharness ${GFLAGS_LIB}) - add_executable(filter_bench + add_executable(filter_bench${ARTIFACT_SUFFIX} util/filter_bench.cc) - target_link_libraries(filter_bench - ${ROCKSDB_LIB}) + target_link_libraries(filter_bench${ARTIFACT_SUFFIX} + ${ROCKSDB_LIB} ${GFLAGS_LIB}) - add_executable(hash_table_bench + add_executable(hash_table_bench${ARTIFACT_SUFFIX} utilities/persistent_cache/hash_table_bench.cc) - target_link_libraries(hash_table_bench - ${ROCKSDB_LIB}) + target_link_libraries(hash_table_bench${ARTIFACT_SUFFIX} + ${ROCKSDB_LIB} ${GFLAGS_LIB}) endif() -option(WITH_TOOLS "build with tools" ON) -if(WITH_TOOLS) +if(WITH_CORE_TOOLS OR WITH_TOOLS) add_subdirectory(tools) + add_custom_target(core_tools + DEPENDS ${core_tool_deps}) +endif() + +if(WITH_TOOLS) add_subdirectory(db_stress_tool) add_custom_target(tools DEPENDS ${tool_deps}) endif() + +option(WITH_EXAMPLES "build with examples" OFF) +if(WITH_EXAMPLES) + add_subdirectory(examples) +endif() + +option(WITH_BENCHMARK "build benchmark tests" OFF) +if(WITH_BENCHMARK) + add_subdirectory(${PROJECT_SOURCE_DIR}/microbench/) +endif() diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/DEFAULT_OPTIONS_HISTORY.md mariadb-10.11.13/storage/rocksdb/rocksdb/DEFAULT_OPTIONS_HISTORY.md --- mariadb-10.11.11/storage/rocksdb/rocksdb/DEFAULT_OPTIONS_HISTORY.md 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/DEFAULT_OPTIONS_HISTORY.md 2025-05-19 16:14:27.000000000 +0000 @@ -1,4 +1,4 @@ -# RocksDB default options change log +# RocksDB default options change log (NO LONGER MAINTAINED) ## Unreleased * delayed_write_rate takes the rate given by rate_limiter if not specified. diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/HISTORY.md mariadb-10.11.13/storage/rocksdb/rocksdb/HISTORY.md --- mariadb-10.11.11/storage/rocksdb/rocksdb/HISTORY.md 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/HISTORY.md 2025-05-19 16:14:27.000000000 +0000 @@ -1,9 +1,707 @@ # Rocksdb Change Log -## Unreleased +## 6.29.5 (03/29/2022) ### Bug Fixes +* Fixed a race condition for `alive_log_files_` in non-two-write-queues mode. The race is between the write_thread_ in WriteToWAL() and another thread executing `FindObsoleteFiles()`. The race condition will be caught if `__glibcxx_requires_nonempty` is enabled. +* Fixed a race condition when mmaping a WritableFile on POSIX. +* Fixed a race condition when 2PC is disabled and WAL tracking in the MANIFEST is enabled. The race condition is between two background flush threads trying to install flush results, causing a WAL deletion not tracked in the MANIFEST. A future DB open may fail. +* Fixed a heap use-after-free race with DropColumnFamily. +* Fixed a bug that `rocksdb.read.block.compaction.micros` cannot track compaction stats (#9722). + +## 6.29.4 (03/22/2022) +### Bug Fixes +* Fixed a bug caused by race among flush, incoming writes and taking snapshots. Queries to snapshots created with these race condition can return incorrect result, e.g. resurfacing deleted data. +* Fixed a bug that DisableManualCompaction may assert when disable an unscheduled manual compaction. +* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed. +* Fixed a race condition when disable and re-enable manual compaction. +* Fix a race condition when cancel manual compaction with `DisableManualCompaction`. Also DB close can cancel the manual compaction thread. +* Fixed a data race on `versions_` between `DBImpl::ResumeImpl()` and threads waiting for recovery to complete (#9496) +* Fixed a read-after-free bug in `DB::GetMergeOperands()`. +* Fixed NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, NUM_DATA_BLOCKS_READ_PER_LEVEL, and NUM_SST_READ_PER_LEVEL stats to be reported once per MultiGet batch per level. + +## 6.29.3 (02/17/2022) +### Bug Fixes +* Fix a data loss bug for 2PC write-committed transaction caused by concurrent transaction commit and memtable switch (#9571). + +## 6.29.2 (02/15/2022) +### Performance Improvements +* DisableManualCompaction() doesn't have to wait scheduled manual compaction to be executed in thread-pool to cancel the job. + +## 6.29.1 (01/31/2022) +### Bug Fixes +* Fixed a major bug in which batched MultiGet could return old values for keys deleted by DeleteRange when memtable Bloom filter is enabled (memtable_prefix_bloom_size_ratio > 0). (The fix includes a substantial MultiGet performance improvement in the unusual case of both memtable_whole_key_filtering and prefix_extractor.) + +## 6.29.0 (01/21/2022) +Note: The next release will be major release 7.0. See https://github.com/facebook/rocksdb/issues/9390 for more info. +### Public API change +* Added values to `TraceFilterType`: `kTraceFilterIteratorSeek`, `kTraceFilterIteratorSeekForPrev`, and `kTraceFilterMultiGet`. They can be set in `TraceOptions` to filter out the operation types after which they are named. +* Added `TraceOptions::preserve_write_order`. When enabled it guarantees write records are traced in the same order they are logged to WAL and applied to the DB. By default it is disabled (false) to match the legacy behavior and prevent regression. +* Made the Env class extend the Customizable class. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. +* `Options::OldDefaults` is marked deprecated, as it is no longer maintained. +* Add ObjectLibrary::AddFactory and ObjectLibrary::PatternEntry classes. This method and associated class are the preferred mechanism for registering factories with the ObjectLibrary going forward. The ObjectLibrary::Register method, which uses regular expressions and may be problematic, is deprecated and will be in a future release. +* Changed `BlockBasedTableOptions::block_size` from `size_t` to `uint64_t`. +* Added API warning against using `Iterator::Refresh()` together with `DB::DeleteRange()`, which are incompatible and have always risked causing the refreshed iterator to return incorrect results. + +### Behavior Changes +* `DB::DestroyColumnFamilyHandle()` will return Status::InvalidArgument() if called with `DB::DefaultColumnFamily()`. +* On 32-bit platforms, mmap reads are no longer quietly disabled, just discouraged. + +### New Features +* Added `Options::DisableExtraChecks()` that can be used to improve peak write performance by disabling checks that should not be necessary in the absence of software logic errors or CPU+memory hardware errors. (Default options are slowly moving toward some performance overheads for extra correctness checking.) + +### Performance Improvements +* Improved read performance when a prefix extractor is used (Seek, Get, MultiGet), even compared to version 6.25 baseline (see bug fix below), by optimizing the common case of prefix extractor compatible with table file and unchanging. + +### Bug Fixes +* Fix a bug that FlushMemTable may return ok even flush not succeed. +* Fixed a bug of Sync() and Fsync() not using `fcntl(F_FULLFSYNC)` on OS X and iOS. +* Fixed a significant performance regression in version 6.26 when a prefix extractor is used on the read path (Seek, Get, MultiGet). (Excessive time was spent in SliceTransform::AsString().) + +### New Features +* Added RocksJava support for MacOS universal binary (ARM+x86) + +## 6.28.0 (2021-12-17) +### New Features +* Introduced 'CommitWithTimestamp' as a new tag. Currently, there is no API for user to trigger a write with this tag to the WAL. This is part of the efforts to support write-commited transactions with user-defined timestamps. +* Introduce SimulatedHybridFileSystem which can help simulating HDD latency in db_bench. Tiered Storage latency simulation can be enabled using -simulate_hybrid_fs_file (note that it doesn't work if db_bench is interrupted in the middle). -simulate_hdd can also be used to simulate all files on HDD. + +### Bug Fixes +* Fixed a bug in rocksdb automatic implicit prefetching which got broken because of new feature adaptive_readahead and internal prefetching got disabled when iterator moves from one file to next. +* Fixed a bug in TableOptions.prepopulate_block_cache which causes segmentation fault when used with TableOptions.partition_filters = true and TableOptions.cache_index_and_filter_blocks = true. +* Fixed a bug affecting custom memtable factories which are not registered with the `ObjectRegistry`. The bug could result in failure to save the OPTIONS file. +* Fixed a bug causing two duplicate entries to be appended to a file opened in non-direct mode and tracked by `FaultInjectionTestFS`. +* Fixed a bug in TableOptions.prepopulate_block_cache to support block-based filters also. +* Block cache keys no longer use `FSRandomAccessFile::GetUniqueId()` (previously used when available), so a filesystem recycling unique ids can no longer lead to incorrect result or crash (#7405). For files generated by RocksDB >= 6.24, the cache keys are stable across DB::Open and DB directory move / copy / import / export / migration, etc. Although collisions are still theoretically possible, they are (a) impossible in many common cases, (b) not dependent on environmental factors, and (c) much less likely than a CPU miscalculation while executing RocksDB. +* Fixed a bug in C bindings causing iterator to return incorrect result (#9343). + +### Behavior Changes +* MemTableList::TrimHistory now use allocated bytes when max_write_buffer_size_to_maintain > 0(default in TrasactionDB, introduced in PR#5022) Fix #8371. + +### Public API change +* Extend WriteBatch::AssignTimestamp and AssignTimestamps API so that both functions can accept an optional `checker` argument that performs additional checking on timestamp sizes. +* Introduce a new EventListener callback that will be called upon the end of automatic error recovery. +* Add IncreaseFullHistoryTsLow API so users can advance each column family's full_history_ts_low seperately. +* Add GetFullHistoryTsLow API so users can query current full_history_low value of specified column family. + +### Performance Improvements +* Replaced map property `TableProperties::properties_offsets` with uint64_t property `external_sst_file_global_seqno_offset` to save table properties's memory. +* Block cache accesses are faster by RocksDB using cache keys of fixed size (16 bytes). + +### Java API Changes +* Removed Java API `TableProperties.getPropertiesOffsets()` as it exposed internal details to external users. + +## 6.27.0 (2021-11-19) +### New Features +* Added new ChecksumType kXXH3 which is faster than kCRC32c on almost all x86\_64 hardware. +* Added a new online consistency check for BlobDB which validates that the number/total size of garbage blobs does not exceed the number/total size of all blobs in any given blob file. +* Provided support for tracking per-sst user-defined timestamp information in MANIFEST. +* Added new option "adaptive_readahead" in ReadOptions. For iterators, RocksDB does auto-readahead on noticing sequential reads and by enabling this option, readahead_size of current file (if reads are sequential) will be carried forward to next file instead of starting from the scratch at each level (except L0 level files). If reads are not sequential it will fall back to 8KB. This option is applicable only for RocksDB internal prefetch buffer and isn't supported with underlying file system prefetching. +* Added the read count and read bytes related stats to Statistics for tiered storage hot, warm, and cold file reads. +* Added an option to dynamically charge an updating estimated memory usage of block-based table building to block cache if block cache available. It currently only includes charging memory usage of constructing (new) Bloom Filter and Ribbon Filter to block cache. To enable this feature, set `BlockBasedTableOptions::reserve_table_builder_memory = true`. +* Add a new API OnIOError in listener.h that notifies listeners when an IO error occurs during FileSystem operation along with filename, status etc. +* Added compaction readahead support for blob files to the integrated BlobDB implementation, which can improve compaction performance when the database resides on higher-latency storage like HDDs or remote filesystems. Readahead can be configured using the column family option `blob_compaction_readahead_size`. + +### Bug Fixes +* Prevent a `CompactRange()` with `CompactRangeOptions::change_level == true` from possibly causing corruption to the LSM state (overlapping files within a level) when run in parallel with another manual compaction. Note that setting `force_consistency_checks == true` (the default) would cause the DB to enter read-only mode in this scenario and return `Status::Corruption`, rather than committing any corruption. +* Fixed a bug in CompactionIterator when write-prepared transaction is used. A released earliest write conflict snapshot may cause assertion failure in dbg mode and unexpected key in opt mode. +* Fix ticker WRITE_WITH_WAL("rocksdb.write.wal"), this bug is caused by a bad extra `RecordTick(stats_, WRITE_WITH_WAL)` (at 2 place), this fix remove the extra `RecordTick`s and fix the corresponding test case. +* EventListener::OnTableFileCreated was previously called with OK status and file_size==0 in cases of no SST file contents written (because there was no content to add) and the empty file deleted before calling the listener. Now the status is Aborted. +* Fixed a bug in CompactionIterator when write-preared transaction is used. Releasing earliest_snapshot during compaction may cause a SingleDelete to be output after a PUT of the same user key whose seq has been zeroed. +* Added input sanitization on negative bytes passed into `GenericRateLimiter::Request`. +* Fixed an assertion failure in CompactionIterator when write-prepared transaction is used. We prove that certain operations can lead to a Delete being followed by a SingleDelete (same user key). We can drop the SingleDelete. +* Fixed a bug of timestamp-based GC which can cause all versions of a key under full_history_ts_low to be dropped. This bug will be triggered when some of the ikeys' timestamps are lower than full_history_ts_low, while others are newer. +* In some cases outside of the DB read and compaction paths, SST block checksums are now checked where they were not before. +* Explicitly check for and disallow the `BlockBasedTableOptions` if insertion into one of {`block_cache`, `block_cache_compressed`, `persistent_cache`} can show up in another of these. (RocksDB expects to be able to use the same key for different physical data among tiers.) +* Users who configured a dedicated thread pool for bottommost compactions by explicitly adding threads to the `Env::Priority::BOTTOM` pool will no longer see RocksDB schedule automatic compactions exceeding the DB's compaction concurrency limit. For details on per-DB compaction concurrency limit, see API docs of `max_background_compactions` and `max_background_jobs`. +* Fixed a bug of background flush thread picking more memtables to flush and prematurely advancing column family's log_number. +* Fixed an assertion failure in ManifestTailer. +* Fixed a bug that could, with WAL enabled, cause backups, checkpoints, and `GetSortedWalFiles()` to fail randomly with an error like `IO error: 001234.log: No such file or directory` + +### Behavior Changes +* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files. +* `TransactionUtil::CheckKeyForConflicts` can also perform conflict-checking based on user-defined timestamps in addition to sequence numbers. +* Removed `GenericRateLimiter`'s minimum refill bytes per period previously enforced. + +### Public API change +* When options.ttl is used with leveled compaction with compactinon priority kMinOverlappingRatio, files exceeding half of TTL value will be prioritized more, so that by the time TTL is reached, fewer extra compactions will be scheduled to clear them up. At the same time, when compacting files with data older than half of TTL, output files may be cut off based on those files' boundaries, in order for the early TTL compaction to work properly. +* Made FileSystem and RateLimiter extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. +* Clarified in API comments that RocksDB is not exception safe for callbacks and custom extensions. An exception propagating into RocksDB can lead to undefined behavior, including data loss, unreported corruption, deadlocks, and more. +* Marked `WriteBufferManager` as `final` because it is not intended for extension. +* Removed unimportant implementation details from table_properties.h +* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%. +* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true. +* In builds where glibc provides `gettid()`, info log ("LOG" file) lines now print a system-wide thread ID from `gettid()` instead of the process-local `pthread_self()`. For all users, the thread ID format is changed from hexadecimal to decimal integer. +* In builds where glibc provides `pthread_setname_np()`, the background thread names no longer contain an ID suffix. For example, "rocksdb:bottom7" (and all other threads in the `Env::Priority::BOTTOM` pool) are now named "rocksdb:bottom". Previously large thread pools could breach the name size limit (e.g., naming "rocksdb:bottom10" would fail). +* Deprecating `ReadOptions::iter_start_seqnum` and `DBOptions::preserve_deletes`, please try using user defined timestamp feature instead. The options will be removed in a future release, currently it logs a warning message when using. + +### Performance Improvements +* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070) + +### Behavior Changes +* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files. + +## 6.26.0 (2021-10-20) +### Bug Fixes +* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets. +* Fix the incorrect disabling of SST rate limited deletion when the WAL and DB are in different directories. Only WAL rate limited deletion should be disabled if its in a different directory. +* Fix `DisableManualCompaction()` to cancel compactions even when they are waiting on automatic compactions to drain due to `CompactRangeOptions::exclusive_manual_compactions == true`. +* Fix contract of `Env::ReopenWritableFile()` and `FileSystem::ReopenWritableFile()` to specify any existing file must not be deleted or truncated. +* Fixed bug in calls to `IngestExternalFiles()` with files for multiple column families. The bug could have introduced a delay in ingested file keys becoming visible after `IngestExternalFiles()` returned. Furthermore, mutations to ingested file keys while they were invisible could have been dropped (not necessarily immediately). +* Fixed a possible race condition impacting users of `WriteBufferManager` who constructed it with `allow_stall == true`. The race condition led to undefined behavior (in our experience, typically a process crash). +* Fixed a bug where stalled writes would remain stalled forever after the user calls `WriteBufferManager::SetBufferSize()` with `new_size == 0` to dynamically disable memory limiting. +* Make `DB::close()` thread-safe. +* Fix a bug in atomic flush where one bg flush thread will wait forever for a preceding bg flush thread to commit its result to MANIFEST but encounters an error which is mapped to a soft error (DB not stopped). +* Fix a bug in `BackupEngine` where some internal callers of `GenericRateLimiter::Request()` do not honor `bytes <= GetSingleBurstBytes()`. + +### New Features +* Print information about blob files when using "ldb list_live_files_metadata" +* Provided support for SingleDelete with user defined timestamp. +* Experimental new function DB::GetLiveFilesStorageInfo offers essentially a unified version of other functions like GetLiveFiles, GetLiveFilesChecksumInfo, and GetSortedWalFiles. Checkpoints and backups could show small behavioral changes and/or improved performance as they now use this new API. +* Add remote compaction read/write bytes statistics: `REMOTE_COMPACT_READ_BYTES`, `REMOTE_COMPACT_WRITE_BYTES`. +* Introduce an experimental feature to dump out the blocks from block cache and insert them to the secondary cache to reduce the cache warmup time (e.g., used while migrating DB instance). More information are in `class CacheDumper` and `CacheDumpedLoader` at `rocksdb/utilities/cache_dump_load.h` Note that, this feature is subject to the potential change in the future, it is still experimental. +* Introduced a new BlobDB configuration option `blob_garbage_collection_force_threshold`, which can be used to trigger compactions targeting the SST files which reference the oldest blob files when the ratio of garbage in those blob files meets or exceeds the specified threshold. This can reduce space amplification with skewed workloads where the affected SST files might not otherwise get picked up for compaction. +* Added EXPERIMENTAL support for table file (SST) unique identifiers that are stable and universally unique, available with new function `GetUniqueIdFromTableProperties`. Only SST files from RocksDB >= 6.24 support unique IDs. +* Added `GetMapProperty()` support for "rocksdb.dbstats" (`DB::Properties::kDBStats`). As a map property, it includes DB-level internal stats accumulated over the DB's lifetime, such as user write related stats and uptime. + +### Public API change +* Made SystemClock extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. +* Made SliceTransform extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. The Capped and Prefixed transform classes return a short name (no length); use GetId for the fully qualified name. +* Made FileChecksumGenFactory, SstPartitionerFactory, TablePropertiesCollectorFactory, and WalFilter extend the Customizable class and added a CreateFromString method. +* Some fields of SstFileMetaData are deprecated for compatibility with new base class FileStorageInfo. +* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files. +* If `DB::Close()` failed with a non aborted status, calling `DB::Close()` again will return the original status instead of Status::OK. +* Add CacheTier to advanced_options.h to describe the cache tier we used. Add a `lowest_used_cache_tier` option to `DBOptions` (immutable) and pass it to BlockBasedTableReader. By default it is `CacheTier::kNonVolatileBlockTier`, which means, we always use both block cache (kVolatileTier) and secondary cache (kNonVolatileBlockTier). By set it to `CacheTier::kVolatileTier`, the DB will not use the secondary cache. +* Even when options.max_compaction_bytes is hit, compaction output files are only cut when it aligns with grandparent files' boundaries. options.max_compaction_bytes could be slightly violated with the change, but the violation is no more than one target SST file size, which is usually much smaller. + +### Performance Improvements +* Improved CPU efficiency of building block-based table (SST) files (#9039 and #9040). + +### Java API Changes +* Add Java API bindings for new integrated BlobDB options +* `keyMayExist()` supports ByteBuffer. +* Fix multiget throwing Null Pointer Exception for num of keys > 70k (https://github.com/facebook/rocksdb/issues/8039). + +## 6.25.0 (2021-09-20) +### Bug Fixes +* Allow secondary instance to refresh iterator. Assign read seq after referencing SuperVersion. +* Fixed a bug of secondary instance's last_sequence going backward, and reads on the secondary fail to see recent updates from the primary. +* Fixed a bug that could lead to duplicate DB ID or DB session ID in POSIX environments without /proc/sys/kernel/random/uuid. +* Fix a race in DumpStats() with column family destruction due to not taking a Ref on each entry while iterating the ColumnFamilySet. +* Fix a race in item ref counting in LRUCache when promoting an item from the SecondaryCache. +* Fix a race in BackupEngine if RateLimiter is reconfigured during concurrent Restore operations. +* Fix a bug on POSIX in which failure to create a lock file (e.g. out of space) can prevent future LockFile attempts in the same process on the same file from succeeding. +* Fix a bug that backup_rate_limiter and restore_rate_limiter in BackupEngine could not limit read rates. +* Fix the implementation of `prepopulate_block_cache = kFlushOnly` to only apply to flushes rather than to all generated files. +* Fix WAL log data corruption when using DBOptions.manual_wal_flush(true) and WriteOptions.sync(true) together. The sync WAL should work with locked log_write_mutex_. +* Add checks for validity of the IO uring completion queue entries, and fail the BlockBasedTableReader MultiGet sub-batch if there's an invalid completion +* Add an interface RocksDbIOUringEnable() that, if defined by the user, will allow them to enable/disable the use of IO uring by RocksDB +* Fix the bug that when direct I/O is used and MultiRead() returns a short result, RandomAccessFileReader::MultiRead() still returns full size buffer, with returned short value together with some data in original buffer. This bug is unlikely cause incorrect results, because (1) since FileSystem layer is expected to retry on short result, returning short results is only possible when asking more bytes in the end of the file, which RocksDB doesn't do when using MultiRead(); (2) checksum is unlikely to match. + +### New Features +* RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions. +* Added a ticker statistic, "rocksdb.verify_checksum.read.bytes", reporting how many bytes were read from file to serve `VerifyChecksum()` and `VerifyFileChecksums()` queries. +* Added ticker statistics, "rocksdb.backup.read.bytes" and "rocksdb.backup.write.bytes", reporting how many bytes were read and written during backup. +* Added properties for BlobDB: `rocksdb.num-blob-files`, `rocksdb.blob-stats`, `rocksdb.total-blob-file-size`, and `rocksdb.live-blob-file-size`. The existing property `rocksdb.estimate_live-data-size` was also extended to include live bytes residing in blob files. +* Added two new RateLimiter IOPriorities: `Env::IO_USER`,`Env::IO_MID`. `Env::IO_USER` will have superior priority over all other RateLimiter IOPriorities without being subject to fair scheduling constraint. +* `SstFileWriter` now supports `Put`s and `Delete`s with user-defined timestamps. Note that the ingestion logic itself is not timestamp-aware yet. +* Allow a single write batch to include keys from multiple column families whose timestamps' formats can differ. For example, some column families may disable timestamp, while others enable timestamp. +* Add compaction priority information in RemoteCompaction, which can be used to schedule high priority job first. +* Added new callback APIs `OnBlobFileCreationStarted`,`OnBlobFileCreated`and `OnBlobFileDeleted` in `EventListener` class of listener.h. It notifies listeners during creation/deletion of individual blob files in Integrated BlobDB. It also log blob file creation finished event and deletion event in LOG file. +* Batch blob read requests for `DB::MultiGet` using `MultiRead`. +* Add support for fallback to local compaction, the user can return `CompactionServiceJobStatus::kUseLocal` to instruct RocksDB to run the compaction locally instead of waiting for the remote compaction result. +* Add built-in rate limiter's implementation of `RateLimiter::GetTotalPendingRequest(int64_t* total_pending_requests, const Env::IOPriority pri)` for the total number of requests that are pending for bytes in the rate limiter. +* Charge memory usage during data buffering, from which training samples are gathered for dictionary compression, to block cache. Unbuffering data can now be triggered if the block cache becomes full and `strict_capacity_limit=true` for the block cache, in addition to existing conditions that can trigger unbuffering. + +### Public API change +* Remove obsolete implementation details FullKey and ParseFullKey from public API +* Change `SstFileMetaData::size` from `size_t` to `uint64_t`. +* Made Statistics extend the Customizable class and added a CreateFromString method. Implementations of Statistics need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. +* Extended `FlushJobInfo` and `CompactionJobInfo` in listener.h to provide information about the blob files generated by a flush/compaction and garbage collected during compaction in Integrated BlobDB. Added struct members `blob_file_addition_infos` and `blob_file_garbage_infos` that contain this information. +* Extended parameter `output_file_names` of `CompactFiles` API to also include paths of the blob files generated by the compaction in Integrated BlobDB. +* Most `BackupEngine` functions now return `IOStatus` instead of `Status`. Most existing code should be compatible with this change but some calls might need to be updated. +* Add a new field `level_at_creation` in `TablePropertiesCollectorFactory::Context` to capture the level at creating the SST file (i.e, table), of which the properties are being collected. + +### Miscellaneous +* Add a paranoid check where in case FileSystem layer doesn't fill the buffer but returns succeed, checksum is unlikely to match even if buffer contains a previous block. The byte modified is not useful anyway, so it isn't expected to change any behavior when FileSystem is satisfying its contract. + +## 6.24.0 (2021-08-20) +### Bug Fixes +* If the primary's CURRENT file is missing or inaccessible, the secondary instance should not hang repeatedly trying to switch to a new MANIFEST. It should instead return the error code encountered while accessing the file. +* Restoring backups with BackupEngine is now a logically atomic operation, so that if a restore operation is interrupted, DB::Open on it will fail. Using BackupEngineOptions::sync (default) ensures atomicity even in case of power loss or OS crash. +* Fixed a race related to the destruction of `ColumnFamilyData` objects. The earlier logic unlocked the DB mutex before destroying the thread-local `SuperVersion` pointers, which could result in a process crash if another thread managed to get a reference to the `ColumnFamilyData` object. +* Removed a call to `RenameFile()` on a non-existent info log file ("LOG") when opening a new DB. Such a call was guaranteed to fail though did not impact applications since we swallowed the error. Now we also stopped swallowing errors in renaming "LOG" file. +* Fixed an issue where `OnFlushCompleted` was not called for atomic flush. +* Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`. +* Fixed a potential incorrect result in opt mode and assertion failures caused by releasing snapshot(s) during compaction. +* Fixed passing of BlobFileCompletionCallback to Compaction job and Atomic flush job which was default paramter (nullptr). BlobFileCompletitionCallback is internal callback that manages addition of blob files to SSTFileManager. +* Fixed MultiGet not updating the block_read_count and block_read_byte PerfContext counters. + +### New Features +* Made the EventListener extend the Customizable class. +* EventListeners that have a non-empty Name() and that are registered with the ObjectRegistry can now be serialized to/from the OPTIONS file. +* Insert warm blocks (data blocks, uncompressed dict blocks, index and filter blocks) in Block cache during flush under option BlockBasedTableOptions.prepopulate_block_cache. Previously it was enabled for only data blocks. +* BlockBasedTableOptions.prepopulate_block_cache can be dynamically configured using DB::SetOptions. +* Add CompactionOptionsFIFO.age_for_warm, which allows RocksDB to move old files to warm tier in FIFO compactions. Note that file temperature is still an experimental feature. +* Add a comment to suggest btrfs user to disable file preallocation by setting `options.allow_fallocate=false`. +* Fast forward option in Trace replay changed to double type to allow replaying at a lower speed, by settings the value between 0 and 1. This option can be set via `ReplayOptions` in `Replayer::Replay()`, or via `--trace_replay_fast_forward` in db_bench. +* Add property `LiveSstFilesSizeAtTemperature` to retrieve sst file size at different temperature. +* Added a stat rocksdb.secondary.cache.hits. +* Added a PerfContext counter secondary_cache_hit_count. +* The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`. +* Added hybrid configuration of Ribbon filter and Bloom filter where some LSM levels use Ribbon for memory space efficiency and some use Bloom for speed. See NewRibbonFilterPolicy. This also changes the default behavior of NewRibbonFilterPolicy to use Bloom for flushes under Leveled and Universal compaction and Ribbon otherwise. The C API function `rocksdb_filterpolicy_create_ribbon` is unchanged but adds new `rocksdb_filterpolicy_create_ribbon_hybrid`. + +### Public API change +* Added APIs to decode and replay trace file via Replayer class. Added `DB::NewDefaultReplayer()` to create a default Replayer instance. Added `TraceReader::Reset()` to restart reading a trace file. Created trace_record.h, trace_record_result.h and utilities/replayer.h files to access the decoded Trace records, replay them, and query the actual operation results. +* Added Configurable::GetOptionsMap to the public API for use in creating new Customizable classes. +* Generalized bits_per_key parameters in C API from int to double for greater configurability. Although this is a compatible change for existing C source code, anything depending on C API signatures, such as foreign function interfaces, will need to be updated. + +### Performance Improvements +* Try to avoid updating DBOptions if `SetDBOptions()` does not change any option value. + +### Behavior Changes +* `StringAppendOperator` additionally accepts a string as the delimiter. +* BackupEngineOptions::sync (default true) now applies to restoring backups in addition to creating backups. This could slow down restores, but ensures they are fully persisted before returning OK. (Consider increasing max_background_operations to improve performance.) + +## 6.23.0 (2021-07-16) +### Behavior Changes +* Obsolete keys in the bottommost level that were preserved for a snapshot will now be cleaned upon snapshot release in all cases. This form of compaction (snapshot release triggered compaction) previously had an artificial limitation that multiple tombstones needed to be present. +### Bug Fixes +* Blob file checksums are now printed in hexadecimal format when using the `manifest_dump` `ldb` command. +* `GetLiveFilesMetaData()` now populates the `temperature`, `oldest_ancester_time`, and `file_creation_time` fields of its `LiveFileMetaData` results when the information is available. Previously these fields always contained zero indicating unknown. +* Fix mismatches of OnCompaction{Begin,Completed} in case of DisableManualCompaction(). +* Fix continuous logging of an existing background error on every user write +* Fix a bug that `Get()` return Status::OK() and an empty value for non-existent key when `read_options.read_tier = kBlockCacheTier`. +* Fix a bug that stat in `get_context` didn't accumulate to statistics when query is failed. +* Fixed handling of DBOptions::wal_dir with LoadLatestOptions() or ldb --try_load_options on a copied or moved DB. Previously, when the WAL directory is same as DB directory (default), a copied or moved DB would reference the old path of the DB as the WAL directory, potentially corrupting both copies. Under this change, the wal_dir from DB::GetOptions() or LoadLatestOptions() may now be empty, indicating that the current DB directory is used for WALs. This is also a subtle API change. + +### New Features +* ldb has a new feature, `list_live_files_metadata`, that shows the live SST files, as well as their LSM storage level and the column family they belong to. +* The new BlobDB implementation now tracks the amount of garbage in each blob file in the MANIFEST. +* Integrated BlobDB now supports Merge with base values (Put/Delete etc.). +* RemoteCompaction supports sub-compaction, the job_id in the user interface is changed from `int` to `uint64_t` to support sub-compaction id. +* Expose statistics option in RemoteCompaction worker. + +### Public API change +* Added APIs to the Customizable class to allow developers to create their own Customizable classes. Created the utilities/customizable_util.h file to contain helper methods for developing new Customizable classes. +* Change signature of SecondaryCache::Name(). Make SecondaryCache customizable and add SecondaryCache::CreateFromString method. + +## 6.22.0 (2021-06-18) +### Behavior Changes +* Added two additional tickers, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH and MEMTABLE_GARBAGE_BYTES_AT_FLUSH. These stats can be used to estimate the ratio of "garbage" (outdated) bytes in the memtable that are discarded at flush time. +* Added API comments clarifying safe usage of Disable/EnableManualCompaction and EventListener callbacks for compaction. +### Bug Fixes +* fs_posix.cc GetFreeSpace() always report disk space available to root even when running as non-root. Linux defaults often have disk mounts with 5 to 10 percent of total space reserved only for root. Out of space could result for non-root users. +* Subcompactions are now disabled when user-defined timestamps are used, since the subcompaction boundary picking logic is currently not timestamp-aware, which could lead to incorrect results when different subcompactions process keys that only differ by timestamp. +* Fix an issue that `DeleteFilesInRange()` may cause ongoing compaction reports corruption exception, or ASSERT for debug build. There's no actual data loss or corruption that we find. +* Fixed confusingly duplicated output in LOG for periodic stats ("DUMPING STATS"), including "Compaction Stats" and "File Read Latency Histogram By Level". +* Fixed performance bugs in background gathering of block cache entry statistics, that could consume a lot of CPU when there are many column families with a shared block cache. + +### New Features +* Marked the Ribbon filter and optimize_filters_for_memory features as production-ready, each enabling memory savings for Bloom-like filters. Use `NewRibbonFilterPolicy` in place of `NewBloomFilterPolicy` to use Ribbon filters instead of Bloom, or `ribbonfilter` in place of `bloomfilter` in configuration string. +* Allow `DBWithTTL` to use `DeleteRange` api just like other DBs. `DeleteRangeCF()` which executes `WriteBatchInternal::DeleteRange()` has been added to the handler in `DBWithTTLImpl::Write()` to implement it. +* Add BlockBasedTableOptions.prepopulate_block_cache. If enabled, it prepopulate warm/hot data blocks which are already in memory into block cache at the time of flush. On a flush, the data block that is in memory (in memtables) get flushed to the device. If using Direct IO, additional IO is incurred to read this data back into memory again, which is avoided by enabling this option and it also helps with Distributed FileSystem. More details in include/rocksdb/table.h. +* Added a `cancel` field to `CompactRangeOptions`, allowing individual in-process manual range compactions to be cancelled. + +### New Features +* Added BlobMetaData to the ColumnFamilyMetaData to return information about blob files + +### Public API change +* Added GetAllColumnFamilyMetaData API to retrieve the ColumnFamilyMetaData about all column families. + +## 6.21.0 (2021-05-21) +### Bug Fixes +* Fixed a bug in handling file rename error in distributed/network file systems when the server succeeds but client returns error. The bug can cause CURRENT file to point to non-existing MANIFEST file, thus DB cannot be opened. +* Fixed a bug where ingested files were written with incorrect boundary key metadata. In rare cases this could have led to a level's files being wrongly ordered and queries for the boundary keys returning wrong results. +* Fixed a data race between insertion into memtables and the retrieval of the DB properties `rocksdb.cur-size-active-mem-table`, `rocksdb.cur-size-all-mem-tables`, and `rocksdb.size-all-mem-tables`. +* Fixed the false-positive alert when recovering from the WAL file. Avoid reporting "SST file is ahead of WAL" on a newly created empty column family, if the previous WAL file is corrupted. +* Fixed a bug where `GetLiveFiles()` output included a non-existent file called "OPTIONS-000000". Backups and checkpoints, which use `GetLiveFiles()`, failed on DBs impacted by this bug. Read-write DBs were impacted when the latest OPTIONS file failed to write and `fail_if_options_file_error == false`. Read-only DBs were impacted when no OPTIONS files existed. +* Handle return code by io_uring_submit_and_wait() and io_uring_wait_cqe(). +* In the IngestExternalFile() API, only try to sync the ingested file if the file is linked and the FileSystem/Env supports reopening a writable file. +* Fixed a bug that `AdvancedColumnFamilyOptions.max_compaction_bytes` is under-calculated for manual compaction (`CompactRange()`). Manual compaction is split to multiple compactions if the compaction size exceed the `max_compaction_bytes`. The bug creates much larger compaction which size exceed the user setting. On the other hand, larger manual compaction size can increase the subcompaction parallelism, you can tune that by setting `max_compaction_bytes`. + +### Behavior Changes +* Due to the fix of false-postive alert of "SST file is ahead of WAL", all the CFs with no SST file (CF empty) will bypass the consistency check. We fixed a false-positive, but introduced a very rare true-negative which will be triggered in the following conditions: A CF with some delete operations in the last a few queries which will result in an empty CF (those are flushed to SST file and a compaction triggered which combines this file and all other SST files and generates an empty CF, or there is another reason to write a manifest entry for this CF after a flush that generates no SST file from an empty CF). The deletion entries are logged in a WAL and this WAL was corrupted, while the CF's log number points to the next WAL (due to the flush). Therefore, the DB can only recover to the point without these trailing deletions and cause the inconsistent DB status. + +### New Features +* Add new option allow_stall passed during instance creation of WriteBufferManager. When allow_stall is set, WriteBufferManager will stall all writers shared across multiple DBs and columns if memory usage goes beyond specified WriteBufferManager::buffer_size (soft limit). Stall will be cleared when memory is freed after flush and memory usage goes down below buffer_size. +* Allow `CompactionFilter`s to apply in more table file creation scenarios such as flush and recovery. For compatibility, `CompactionFilter`s by default apply during compaction. Users can customize this behavior by overriding `CompactionFilterFactory::ShouldFilterTableFileCreation()`. +* Added more fields to FilterBuildingContext with LSM details, for custom filter policies that vary behavior based on where they are in the LSM-tree. +* Added DB::Properties::kBlockCacheEntryStats for querying statistics on what percentage of block cache is used by various kinds of blocks, etc. using DB::GetProperty and DB::GetMapProperty. The same information is now dumped to info LOG periodically according to `stats_dump_period_sec`. +* Add an experimental Remote Compaction feature, which allows the user to run Compaction on a different host or process. The feature is still under development, currently only works on some basic use cases. The interface will be changed without backward/forward compatibility support. +* RocksDB would validate total entries read in flush, and compare with counter inserted into it. If flush_verify_memtable_count = true (default), flush will fail. Otherwise, only log to info logs. +* Add `TableProperties::num_filter_entries`, which can be used with `TableProperties::filter_size` to calculate the effective bits per filter entry (unique user key or prefix) for a table file. + +### Performance Improvements +* BlockPrefetcher is used by iterators to prefetch data if they anticipate more data to be used in future. It is enabled implicitly by rocksdb. Added change to take in account read pattern if reads are sequential. This would disable prefetching for random reads in MultiGet and iterators as readahead_size is increased exponential doing large prefetches. + +### Public API change +* Removed a parameter from TableFactory::NewTableBuilder, which should not be called by user code because TableBuilder is not a public API. +* Removed unused structure `CompactionFilterContext`. +* The `skip_filters` parameter to SstFileWriter is now considered deprecated. Use `BlockBasedTableOptions::filter_policy` to control generation of filters. +* ClockCache is known to have bugs that could lead to crash or corruption, so should not be used until fixed. Use NewLRUCache instead. +* Added a new pure virtual function `ApplyToAllEntries` to `Cache`, to replace `ApplyToAllCacheEntries`. Custom `Cache` implementations must add an implementation. Because this function is for gathering statistics, an empty implementation could be acceptable for some applications. +* Added the ObjectRegistry to the ConfigOptions class. This registry instance will be used to find any customizable loadable objects during initialization. +* Expanded the ObjectRegistry functionality to allow nested ObjectRegistry instances. Added methods to register a set of functions with the registry/library as a group. +* Deprecated backupable_db.h and BackupableDBOptions in favor of new versions with appropriate names: backup_engine.h and BackupEngineOptions. Old API compatibility is preserved. + +### Default Option Change +* When options.arena_block_size <= 0 (default value 0), still use writer_buffer_size / 8 but cap to 1MB. Too large alloation size might not be friendly to allocator and might cause performance issues in extreme cases. + +### Build +* By default, try to build with liburing. For make, if ROCKSDB_USE_IO_URING is not set, treat as enable, which means RocksDB will try to build with liburing. Users can disable it with ROCKSDB_USE_IO_URING=0. For cmake, add WITH_LIBURING to control it, with default on. + +## 6.20.0 (2021-04-16) +### Behavior Changes +* `ColumnFamilyOptions::sample_for_compression` now takes effect for creation of all block-based tables. Previously it only took effect for block-based tables created by flush. +* `CompactFiles()` can no longer compact files from lower level to up level, which has the risk to corrupt DB (details: #8063). The validation is also added to all compactions. +* Fixed some cases in which DB::OpenForReadOnly() could write to the filesystem. If you want a Logger with a read-only DB, you must now set DBOptions::info_log yourself, such as using CreateLoggerFromOptions(). +* get_iostats_context() will never return nullptr. If thread-local support is not available, and user does not opt-out iostats context, then compilation will fail. The same applies to perf context as well. +* Added support for WriteBatchWithIndex::NewIteratorWithBase when overwrite_key=false. Previously, this combination was not supported and would assert or return nullptr. +* Improve the behavior of WriteBatchWithIndex for Merge operations. Now more operations may be stored in order to return the correct merged result. + +### Bug Fixes +* Use thread-safe `strerror_r()` to get error messages. +* Fixed a potential hang in shutdown for a DB whose `Env` has high-pri thread pool disabled (`Env::GetBackgroundThreads(Env::Priority::HIGH) == 0`) +* Made BackupEngine thread-safe and added documentation comments to clarify what is safe for multiple BackupEngine objects accessing the same backup directory. +* Fixed crash (divide by zero) when compression dictionary is applied to a file containing only range tombstones. +* Fixed a backward iteration bug with partitioned filter enabled: not including the prefix of the last key of the previous filter partition in current filter partition can cause wrong iteration result. +* Fixed a bug that allowed `DBOptions::max_open_files` to be set with a non-negative integer with `ColumnFamilyOptions::compaction_style = kCompactionStyleFIFO`. + +### Performance Improvements +* On ARM platform, use `yield` instead of `wfe` to relax cpu to gain better performance. + +### Public API change +* Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead. +* Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace +* Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`. +* Extend file_checksum_dump ldb command and DB::GetLiveFilesChecksumInfo API for IntegratedBlobDB and get checksum of blob files along with SST files. + +### New Features +* Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true. +* Added BackupEngine support for integrated BlobDB, with blob files shared between backups when table files are shared. Because of current limitations, blob files always use the kLegacyCrc32cAndFileSize naming scheme, and incremental backups must read and checksum all blob files in a DB, even for files that are already backed up. +* Added an optional output parameter to BackupEngine::CreateNewBackup(WithMetadata) to return the BackupID of the new backup. +* Added BackupEngine::GetBackupInfo / GetLatestBackupInfo for querying individual backups. +* Made the Ribbon filter a long-term supported feature in terms of the SST schema(compatible with version >= 6.15.0) though the API for enabling it is expected to change. + +## 6.19.0 (2021-03-21) +### Bug Fixes +* Fixed the truncation error found in APIs/tools when dumping block-based SST files in a human-readable format. After fix, the block-based table can be fully dumped as a readable file. +* When hitting a write slowdown condition, no write delay (previously 1 millisecond) is imposed until `delayed_write_rate` is actually exceeded, with an initial burst allowance of 1 millisecond worth of bytes. Also, beyond the initial burst allowance, `delayed_write_rate` is now more strictly enforced, especially with multiple column families. + +### Public API change +* Changed default `BackupableDBOptions::share_files_with_checksum` to `true` and deprecated `false` because of potential for data loss. Note that accepting this change in behavior can temporarily increase backup data usage because files are not shared between backups using the two different settings. Also removed obsolete option kFlagMatchInterimNaming. +* Add a new option BlockBasedTableOptions::max_auto_readahead_size. RocksDB does auto-readahead for iterators on noticing more than two reads for a table file if user doesn't provide readahead_size. The readahead starts at 8KB and doubles on every additional read upto max_auto_readahead_size and now max_auto_readahead_size can be configured dynamically as well. Found that 256 KB readahead size provides the best performance, based on experiments, for auto readahead. Experiment data is in PR #3282. If value is set 0 then no automatic prefetching will be done by rocksdb. Also changing the value will only affect files opened after the change. +* Add suppport to extend DB::VerifyFileChecksums API to also verify blob files checksum. +* When using the new BlobDB, the amount of data written by flushes/compactions is now broken down into table files and blob files in the compaction statistics; namely, Write(GB) denotes the amount of data written to table files, while Wblob(GB) means the amount of data written to blob files. +* New default BlockBasedTableOptions::format_version=5 to enable new Bloom filter implementation by default, compatible with RocksDB versions >= 6.6.0. +* Add new SetBufferSize API to WriteBufferManager to allow dynamic management of memory allotted to all write buffers. This allows user code to adjust memory monitoring provided by WriteBufferManager as process memory needs change datasets grow and shrink. +* Clarified the required semantics of Read() functions in FileSystem and Env APIs. Please ensure any custom implementations are compliant. +* For the new integrated BlobDB implementation, compaction statistics now include the amount of data read from blob files during compaction (due to garbage collection or compaction filters). Write amplification metrics have also been extended to account for data read from blob files. +* Add EqualWithoutTimestamp() to Comparator. +* Extend support to track blob files in SSTFileManager whenever a blob file is created/deleted. Blob files will be scheduled to delete via SSTFileManager and SStFileManager will now take blob files in account while calculating size and space limits along with SST files. +* Add new Append and PositionedAppend API with checksum handoff to legacy Env. + +### New Features +* Support compaction filters for the new implementation of BlobDB. Add `FilterBlobByKey()` to `CompactionFilter`. Subclasses can override this method so that compaction filters can determine whether the actual blob value has to be read during compaction. Use a new `kUndetermined` in `CompactionFilter::Decision` to indicated that further action is necessary for compaction filter to make a decision. +* Add support to extend retrieval of checksums for blob files from the MANIFEST when checkpointing. During backup, rocksdb can detect corruption in blob files during file copies. +* Add new options for db_bench --benchmarks: flush, waitforcompaction, compact0, compact1. +* Add an option to BackupEngine::GetBackupInfo to include the name and size of each backed-up file. Especially in the presence of file sharing among backups, this offers detailed insight into backup space usage. +* Enable backward iteration on keys with user-defined timestamps. +* Add statistics and info log for error handler: counters for bg error, bg io error, bg retryable io error, auto resume count, auto resume total retry number, and auto resume sucess; Histogram for auto resume retry count in each recovery call. Note that, each auto resume attempt will have one or multiple retries. + +### Behavior Changes +* During flush, only WAL sync retryable IO error is mapped to hard error, which will stall the writes. When WAL is used but only SST file write has retryable IO error, it will be mapped to soft error and write will not be affected. + +## 6.18.0 (2021-02-19) +### Behavior Changes +* When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper. +* Introduce a new trace file format for query tracing and replay and trace file version is bump up to 0.2. A payload map is added as the first portion of the payload. We will not have backward compatible issues when adding new entries to trace records. Added the iterator_upper_bound and iterator_lower_bound in Seek and SeekForPrev tracing function. Added them as the new payload member for iterator tracing. + +### New Features +* Add support for key-value integrity protection in live updates from the user buffers provided to `WriteBatch` through the write to RocksDB's in-memory update buffer (memtable). This is intended to detect some cases of in-memory data corruption, due to either software or hardware errors. Users can enable protection by constructing their `WriteBatch` with `protection_bytes_per_key == 8`. +* Add support for updating `full_history_ts_low` option in manual compaction, which is for old timestamp data GC. +* Add a mechanism for using Makefile to build external plugin code into the RocksDB libraries/binaries. This intends to simplify compatibility and distribution for plugins (e.g., special-purpose `FileSystem`s) whose source code resides outside the RocksDB repo. See "plugin/README.md" for developer details, and "PLUGINS.md" for a listing of available plugins. +* Added memory pre-fetching for experimental Ribbon filter, which especially optimizes performance with batched MultiGet. +* A new, experimental version of BlobDB (key-value separation) is now available. The new implementation is integrated into the RocksDB core, i.e. it is accessible via the usual `rocksdb::DB` API, as opposed to the separate `rocksdb::blob_db::BlobDB` interface used by the earlier version, and can be configured on a per-column family basis using the configuration options `enable_blob_files`, `min_blob_size`, `blob_file_size`, `blob_compression_type`, `enable_blob_garbage_collection`, and `blob_garbage_collection_age_cutoff`. It extends RocksDB's consistency guarantees to blobs, and offers more features and better performance. Note that some features, most notably `Merge`, compaction filters, and backup/restore are not yet supported, and there is no support for migrating a database created by the old implementation. + +### Bug Fixes +* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details. +* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. +* Fix `WRITE_PREPARED`, `WRITE_UNPREPARED` TransactionDB `MultiGet()` may return uncommitted data with snapshot. +* In DB::OpenForReadOnly, if any error happens while checking Manifest file path, it was overridden by Status::NotFound. It has been fixed and now actual error is returned. + +### Public API Change +* Added a "only_mutable_options" flag to the ConfigOptions. When this flag is "true", the Configurable functions and convenience methods (such as GetDBOptionsFromString) will only deal with options that are marked as mutable. When this flag is true, only options marked as mutable can be configured (a Status::InvalidArgument will be returned) and options not marked as mutable will not be returned or compared. The default is "false", meaning to compare all options. +* Add new Append and PositionedAppend APIs to FileSystem to bring the data verification information (data checksum information) from upper layer (e.g., WritableFileWriter) to the storage layer. In this way, the customized FileSystem is able to verify the correctness of data being written to the storage on time. Add checksum_handoff_file_types to DBOptions. User can use this option to control which file types (Currently supported file tyes: kWALFile, kTableFile, kDescriptorFile.) should use the new Append and PositionedAppend APIs to handoff the verification information. Currently, RocksDB only use crc32c to calculate the checksum for write handoff. +* Add an option, `CompressionOptions::max_dict_buffer_bytes`, to limit the in-memory buffering for selecting samples for generating/training a dictionary. The limit is currently loosely adhered to. + + +## 6.17.0 (2021-01-15) +### Behavior Changes +* When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created. +* Since RocksDB does not continue write the same file if a file write fails for any reason, the file scope write IO error is treated the same as retryable IO error. More information about error handling of file scope IO error is included in `ErrorHandler::SetBGError`. + +### Bug Fixes +* Version older than 6.15 cannot decode VersionEdits `WalAddition` and `WalDeletion`, fixed this by changing the encoded format of them to be ignorable by older versions. +* Fix a race condition between DB startups and shutdowns in managing the periodic background worker threads. One effect of this race condition could be the process being terminated. + +### Public API Change +* Add a public API WriteBufferManager::dummy_entries_in_cache_usage() which reports the size of dummy entries stored in cache (passed to WriteBufferManager). Dummy entries are used to account for DataBlocks. +* Add a SystemClock class that contains the time-related methods from Env. The original methods in Env may be deprecated in a future release. This class will allow easier testing, development, and expansion of time-related features. +* Add a public API GetRocksBuildProperties and GetRocksBuildInfoAsString to get properties about the current build. These properties may include settings related to the GIT settings (branch, timestamp). This change also sets the "build date" based on the GIT properties, rather than the actual build time, thereby enabling more reproducible builds. + +## 6.16.0 (2020-12-18) +### Behavior Changes +* Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation. + +### Bug Fixes +* Truncated WALs ending in incomplete records can no longer produce gaps in the recovered data when `WALRecoveryMode::kPointInTimeRecovery` is used. Gaps are still possible when WALs are truncated exactly on record boundaries; for complete protection, users should enable `track_and_verify_wals_in_manifest`. +* Fix a bug where compressed blocks read by MultiGet are not inserted into the compressed block cache when use_direct_reads = true. +* Fixed the issue of full scanning on obsolete files when there are too many outstanding compactions with ConcurrentTaskLimiter enabled. +* Fixed the logic of populating native data structure for `read_amp_bytes_per_bit` during OPTIONS file parsing on big-endian architecture. Without this fix, original code introduced in PR7659, when running on big-endian machine, can mistakenly store read_amp_bytes_per_bit (an uint32) in little endian format. Future access to `read_amp_bytes_per_bit` will give wrong values. Little endian architecture is not affected. +* Fixed prefix extractor with timestamp issues. +* Fixed a bug in atomic flush: in two-phase commit mode, the minimum WAL log number to keep is incorrect. +* Fixed a bug related to checkpoint in PR7789: if there are multiple column families, and the checkpoint is not opened as read only, then in rare cases, data loss may happen in the checkpoint. Since backup engine relies on checkpoint, it may also be affected. +* When ldb --try_load_options is used with the --column_family option, the ColumnFamilyOptions for the specified column family was not loaded from the OPTIONS file. Fix it so its loaded from OPTIONS and then overridden with command line overrides. + +### New Features +* User defined timestamp feature supports `CompactRange` and `GetApproximateSizes`. +* Support getting aggregated table properties (kAggregatedTableProperties and kAggregatedTablePropertiesAtLevel) with DB::GetMapProperty, for easier access to the data in a structured format. +* Experimental option BlockBasedTableOptions::optimize_filters_for_memory now works with experimental Ribbon filter (as well as Bloom filter). + +### Public API Change +* Deprecated public but rarely-used FilterBitsBuilder::CalculateNumEntry, which is replaced with ApproximateNumEntries taking a size_t parameter and returning size_t. +* To improve portability the functions `Env::GetChildren` and `Env::GetChildrenFileAttributes` will no longer return entries for the special directories `.` or `..`. +* Added a new option `track_and_verify_wals_in_manifest`. If `true`, the log numbers and sizes of the synced WALs are tracked in MANIFEST, then during DB recovery, if a synced WAL is missing from disk, or the WAL's size does not match the recorded size in MANIFEST, an error will be reported and the recovery will be aborted. Note that this option does not work with secondary instance. +* `rocksdb_approximate_sizes` and `rocksdb_approximate_sizes_cf` in the C API now requires an error pointer (`char** errptr`) for receiving any error. +* All overloads of DB::GetApproximateSizes now return Status, so that any failure to obtain the sizes is indicated to the caller. + +## 6.15.0 (2020-11-13) +### Bug Fixes +* Fixed a bug in the following combination of features: indexes with user keys (`format_version >= 3`), indexes are partitioned (`index_type == kTwoLevelIndexSearch`), and some index partitions are pinned in memory (`BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache`). The bug could cause keys to be truncated when read from the index leading to wrong read results or other unexpected behavior. +* Fixed a bug when indexes are partitioned (`index_type == kTwoLevelIndexSearch`), some index partitions are pinned in memory (`BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache`), and partitions reads could be mixed between block cache and directly from the file (e.g., with `enable_index_compression == 1` and `mmap_read == 1`, partitions that were stored uncompressed due to poor compression ratio would be read directly from the file via mmap, while partitions that were stored compressed would be read from block cache). The bug could cause index partitions to be mistakenly considered empty during reads leading to wrong read results. +* Since 6.12, memtable lookup should report unrecognized value_type as corruption (#7121). +* Since 6.14, fix false positive flush/compaction `Status::Corruption` failure when `paranoid_file_checks == true` and range tombstones were written to the compaction output files. +* Since 6.14, fix a bug that could cause a stalled write to crash with mixed of slowdown and no_slowdown writes (`WriteOptions.no_slowdown=true`). +* Fixed a bug which causes hang in closing DB when refit level is set in opt build. It was because ContinueBackgroundWork() was called in assert statement which is a no op. It was introduced in 6.14. +* Fixed a bug which causes Get() to return incorrect result when a key's merge operand is applied twice. This can occur if the thread performing Get() runs concurrently with a background flush thread and another thread writing to the MANIFEST file (PR6069). +* Reverted a behavior change silently introduced in 6.14.2, in which the effects of the `ignore_unknown_options` flag (used in option parsing/loading functions) changed. +* Reverted a behavior change silently introduced in 6.14, in which options parsing/loading functions began returning `NotFound` instead of `InvalidArgument` for option names not available in the present version. +* Fixed MultiGet bugs it doesn't return valid data with user defined timestamp. +* Fixed a potential bug caused by evaluating `TableBuilder::NeedCompact()` before `TableBuilder::Finish()` in compaction job. For example, the `NeedCompact()` method of `CompactOnDeletionCollector` returned by built-in `CompactOnDeletionCollectorFactory` requires `BlockBasedTable::Finish()` to return the correct result. The bug can cause a compaction-generated file not to be marked for future compaction based on deletion ratio. +* Fixed a seek issue with prefix extractor and timestamp. +* Fixed a bug of encoding and parsing BlockBasedTableOptions::read_amp_bytes_per_bit as a 64-bit integer. +* Fixed a bug of a recovery corner case, details in PR7621. + +### Public API Change +* Deprecate `BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache` and `BlockBasedTableOptions::pin_top_level_index_and_filter`. These options still take effect until users migrate to the replacement APIs in `BlockBasedTableOptions::metadata_cache_options`. Migration guidance can be found in the API comments on the deprecated options. +* Add new API `DB::VerifyFileChecksums` to verify SST file checksum with corresponding entries in the MANIFEST if present. Current implementation requires scanning and recomputing file checksums. + +### Behavior Changes +* The dictionary compression settings specified in `ColumnFamilyOptions::compression_opts` now additionally affect files generated by flush and compaction to non-bottommost level. Previously those settings at most affected files generated by compaction to bottommost level, depending on whether `ColumnFamilyOptions::bottommost_compression_opts` overrode them. Users who relied on dictionary compression settings in `ColumnFamilyOptions::compression_opts` affecting only the bottommost level can keep the behavior by moving their dictionary settings to `ColumnFamilyOptions::bottommost_compression_opts` and setting its `enabled` flag. +* When the `enabled` flag is set in `ColumnFamilyOptions::bottommost_compression_opts`, those compression options now take effect regardless of the value in `ColumnFamilyOptions::bottommost_compression`. Previously, those compression options only took effect when `ColumnFamilyOptions::bottommost_compression != kDisableCompressionOption`. Now, they additionally take effect when `ColumnFamilyOptions::bottommost_compression == kDisableCompressionOption` (such a setting causes bottommost compression type to fall back to `ColumnFamilyOptions::compression_per_level` if configured, and otherwise fall back to `ColumnFamilyOptions::compression`). + +### New Features +* An EXPERIMENTAL new Bloom alternative that saves about 30% space compared to Bloom filters, with about 3-4x construction time and similar query times is available using NewExperimentalRibbonFilterPolicy. + +## 6.14 (2020-10-09) +### Bug fixes +* Fixed a bug after a `CompactRange()` with `CompactRangeOptions::change_level` set fails due to a conflict in the level change step, which caused all subsequent calls to `CompactRange()` with `CompactRangeOptions::change_level` set to incorrectly fail with a `Status::NotSupported("another thread is refitting")` error. +* Fixed a bug that the bottom most level compaction could still be a trivial move even if `BottommostLevelCompaction.kForce` or `kForceOptimized` is set. + +### Public API Change +* The methods to create and manage EncrypedEnv have been changed. The EncryptionProvider is now passed to NewEncryptedEnv as a shared pointer, rather than a raw pointer. Comparably, the CTREncryptedProvider now takes a shared pointer, rather than a reference, to a BlockCipher. CreateFromString methods have been added to BlockCipher and EncryptionProvider to provide a single API by which different ciphers and providers can be created, respectively. +* The internal classes (CTREncryptionProvider, ROT13BlockCipher, CTRCipherStream) associated with the EncryptedEnv have been moved out of the public API. To create a CTREncryptionProvider, one can either use EncryptionProvider::NewCTRProvider, or EncryptionProvider::CreateFromString("CTR"). To create a new ROT13BlockCipher, one can either use BlockCipher::NewROT13Cipher or BlockCipher::CreateFromString("ROT13"). +* The EncryptionProvider::AddCipher method has been added to allow keys to be added to an EncryptionProvider. This API will allow future providers to support multiple cipher keys. +* Add a new option "allow_data_in_errors". When this new option is set by users, it allows users to opt-in to get error messages containing corrupted keys/values. Corrupt keys, values will be logged in the messages, logs, status etc. that will help users with the useful information regarding affected data. By default value of this option is set false to prevent users data to be exposed in the messages so currently, data will be redacted from logs, messages, status by default. +* AdvancedColumnFamilyOptions::force_consistency_checks is now true by default, for more proactive DB corruption detection at virtually no cost (estimated two extra CPU cycles per million on a major production workload). Corruptions reported by these checks now mention "force_consistency_checks" in case a false positive corruption report is suspected and the option needs to be disabled (unlikely). Since existing column families have a saved setting for force_consistency_checks, only new column families will pick up the new default. + +### General Improvements +* The settings of the DBOptions and ColumnFamilyOptions are now managed by Configurable objects (see New Features). The same convenience methods to configure these options still exist but the backend implementation has been unified under a common implementation. + +### New Features + +* Methods to configure serialize, and compare -- such as TableFactory -- are exposed directly through the Configurable base class (from which these objects inherit). This change will allow for better and more thorough configuration management and retrieval in the future. The options for a Configurable object can be set via the ConfigureFromMap, ConfigureFromString, or ConfigureOption method. The serialized version of the options of an object can be retrieved via the GetOptionString, ToString, or GetOption methods. The list of options supported by an object can be obtained via the GetOptionNames method. The "raw" object (such as the BlockBasedTableOption) for an option may be retrieved via the GetOptions method. Configurable options can be compared via the AreEquivalent method. The settings within a Configurable object may be validated via the ValidateOptions method. The object may be intialized (at which point only mutable options may be updated) via the PrepareOptions method. +* Introduce options.check_flush_compaction_key_order with default value to be true. With this option, during flush and compaction, key order will be checked when writing to each SST file. If the order is violated, the flush or compaction will fail. +* Added is_full_compaction to CompactionJobStats, so that the information is available through the EventListener interface. +* Add more stats for MultiGet in Histogram to get number of data blocks, index blocks, filter blocks and sst files read from file system per level. +* SST files have a new table property called db_host_id, which is set to the hostname by default. A new option in DBOptions, db_host_id, allows the property value to be overridden with a user specified string, or disable it completely by making the option string empty. +* Methods to create customizable extensions -- such as TableFactory -- are exposed directly through the Customizable base class (from which these objects inherit). This change will allow these Customizable classes to be loaded and configured in a standard way (via CreateFromString). More information on how to write and use Customizable classes is in the customizable.h header file. + +## 6.13 (2020-09-12) +### Bug fixes +* Fix a performance regression introduced in 6.4 that makes a upper bound check for every Next() even if keys are within a data block that is within the upper bound. +* Fix a possible corruption to the LSM state (overlapping files within a level) when a `CompactRange()` for refitting levels (`CompactRangeOptions::change_level == true`) and another manual compaction are executed in parallel. +* Sanitize `recycle_log_file_num` to zero when the user attempts to enable it in combination with `WALRecoveryMode::kTolerateCorruptedTailRecords`. Previously the two features were allowed together, which compromised the user's configured crash-recovery guarantees. +* Fix a bug where a level refitting in CompactRange() might race with an automatic compaction that puts the data to the target level of the refitting. The bug has been there for years. +* Fixed a bug in version 6.12 in which BackupEngine::CreateNewBackup could fail intermittently with non-OK status when backing up a read-write DB configured with a DBOptions::file_checksum_gen_factory. +* Fix useless no-op compactions scheduled upon snapshot release when options.disable-auto-compactions = true. +* Fix a bug when max_write_buffer_size_to_maintain is set, immutable flushed memtable destruction is delayed until the next super version is installed. A memtable is not added to delete list because of its reference hold by super version and super version doesn't switch because of empt delete list. So memory usage keeps on increasing beyond write_buffer_size + max_write_buffer_size_to_maintain. +* Avoid converting MERGES to PUTS when allow_ingest_behind is true. +* Fix compression dictionary sampling together with `SstFileWriter`. Previously, the dictionary would be trained/finalized immediately with zero samples. Now, the whole `SstFileWriter` file is buffered in memory and then sampled. +* Fix a bug with `avoid_unnecessary_blocking_io=1` and creating backups (BackupEngine::CreateNewBackup) or checkpoints (Checkpoint::Create). With this setting and WAL enabled, these operations could randomly fail with non-OK status. +* Fix a bug in which bottommost compaction continues to advance the underlying InternalIterator to skip tombstones even after shutdown. + +### New Features +* A new field `std::string requested_checksum_func_name` is added to `FileChecksumGenContext`, which enables the checksum factory to create generators for a suite of different functions. +* Added a new subcommand, `ldb unsafe_remove_sst_file`, which removes a lost or corrupt SST file from a DB's metadata. This command involves data loss and must not be used on a live DB. + +### Performance Improvements +* Reduce thread number for multiple DB instances by re-using one global thread for statistics dumping and persisting. +* Reduce write-amp in heavy write bursts in `kCompactionStyleLevel` compaction style with `level_compaction_dynamic_level_bytes` set. +* BackupEngine incremental backups no longer read DB table files that are already saved to a shared part of the backup directory, unless `share_files_with_checksum` is used with `kLegacyCrc32cAndFileSize` naming (discouraged). + * For `share_files_with_checksum`, we are confident there is no regression (vs. pre-6.12) in detecting DB or backup corruption at backup creation time, mostly because the old design did not leverage this extra checksum computation for detecting inconsistencies at backup creation time. + * For `share_table_files` without "checksum" (not recommended), there is a regression in detecting fundamentally unsafe use of the option, greatly mitigated by file size checking (under "Behavior Changes"). Almost no reason to use `share_files_with_checksum=false` should remain. + * `DB::VerifyChecksum` and `BackupEngine::VerifyBackup` with checksum checking are still able to catch corruptions that `CreateNewBackup` does not. + +### Public API Change +* Expose kTypeDeleteWithTimestamp in EntryType and update GetEntryType() accordingly. +* Added file_checksum and file_checksum_func_name to TableFileCreationInfo, which can pass the table file checksum information through the OnTableFileCreated callback during flush and compaction. +* A warning is added to `DB::DeleteFile()` API describing its known problems and deprecation plan. +* Add a new stats level, i.e. StatsLevel::kExceptTickers (PR7329) to exclude tickers even if application passes a non-null Statistics object. +* Added a new status code IOStatus::IOFenced() for the Env/FileSystem to indicate that writes from this instance are fenced off. Like any other background error, this error is returned to the user in Put/Merge/Delete/Flush calls and can be checked using Status::IsIOFenced(). + +### Behavior Changes +* File abstraction `FSRandomAccessFile.Prefetch()` default return status is changed from `OK` to `NotSupported`. If the user inherited file doesn't implement prefetch, RocksDB will create internal prefetch buffer to improve read performance. +* When retryabel IO error happens during Flush (manifest write error is excluded) and WAL is disabled, originally it is mapped to kHardError. Now,it is mapped to soft error. So DB will not stall the writes unless the memtable is full. At the same time, when auto resume is triggered to recover the retryable IO error during Flush, SwitchMemtable is not called to avoid generating to many small immutable memtables. If WAL is enabled, no behavior changes. +* When considering whether a table file is already backed up in a shared part of backup directory, BackupEngine would already query the sizes of source (DB) and pre-existing destination (backup) files. BackupEngine now uses these file sizes to detect corruption, as at least one of (a) old backup, (b) backup in progress, or (c) current DB is corrupt if there's a size mismatch. + +### Others +* Error in prefetching partitioned index blocks will not be swallowed. It will fail the query and return the IOError users. + +## 6.12 (2020-07-28) +### Public API Change +* Encryption file classes now exposed for inheritance in env_encryption.h +* File I/O listener is extended to cover more I/O operations. Now class `EventListener` in listener.h contains new callback functions: `OnFileFlushFinish()`, `OnFileSyncFinish()`, `OnFileRangeSyncFinish()`, `OnFileTruncateFinish()`, and ``OnFileCloseFinish()``. +* `FileOperationInfo` now reports `duration` measured by `std::chrono::steady_clock` and `start_ts` measured by `std::chrono::system_clock` instead of start and finish timestamps measured by `system_clock`. Note that `system_clock` is called before `steady_clock` in program order at operation starts. +* `DB::GetDbSessionId(std::string& session_id)` is added. `session_id` stores a unique identifier that gets reset every time the DB is opened. This DB session ID should be unique among all open DB instances on all hosts, and should be unique among re-openings of the same or other DBs. This identifier is recorded in the LOG file on the line starting with "DB Session ID:". +* `DB::OpenForReadOnly()` now returns `Status::NotFound` when the specified DB directory does not exist. Previously the error returned depended on the underlying `Env`. This change is available in all 6.11 releases as well. +* A parameter `verify_with_checksum` is added to `BackupEngine::VerifyBackup`, which is false by default. If it is ture, `BackupEngine::VerifyBackup` verifies checksums and file sizes of backup files. Pass `false` for `verify_with_checksum` to maintain the previous behavior and performance of `BackupEngine::VerifyBackup`, by only verifying sizes of backup files. + +### Behavior Changes +* Best-efforts recovery ignores CURRENT file completely. If CURRENT file is missing during recovery, best-efforts recovery still proceeds with MANIFEST file(s). +* In best-efforts recovery, an error that is not Corruption or IOError::kNotFound or IOError::kPathNotFound will be overwritten silently. Fix this by checking all non-ok cases and return early. +* When `file_checksum_gen_factory` is set to `GetFileChecksumGenCrc32cFactory()`, BackupEngine will compare the crc32c checksums of table files computed when creating a backup to the expected checksums stored in the DB manifest, and will fail `CreateNewBackup()` on mismatch (corruption). If the `file_checksum_gen_factory` is not set or set to any other customized factory, there is no checksum verification to detect if SST files in a DB are corrupt when read, copied, and independently checksummed by BackupEngine. +* When a DB sets `stats_dump_period_sec > 0`, either as the initial value for DB open or as a dynamic option change, the first stats dump is staggered in the following X seconds, where X is an integer in `[0, stats_dump_period_sec)`. Subsequent stats dumps are still spaced `stats_dump_period_sec` seconds apart. +* When the paranoid_file_checks option is true, a hash is generated of all keys and values are generated when the SST file is written, and then the values are read back in to validate the file. A corruption is signaled if the two hashes do not match. + +### Bug fixes +* Compressed block cache was automatically disabled with read-only DBs by mistake. Now it is fixed: compressed block cache will be in effective with read-only DB too. +* Fix a bug of wrong iterator result if another thread finishes an update and a DB flush between two statement. +* Disable file deletion after MANIFEST write/sync failure until db re-open or Resume() so that subsequent re-open will not see MANIFEST referencing deleted SSTs. +* Fix a bug when index_type == kTwoLevelIndexSearch in PartitionedIndexBuilder to update FlushPolicy to point to internal key partitioner when it changes from user-key mode to internal-key mode in index partition. +* Make compaction report InternalKey corruption while iterating over the input. +* Fix a bug which may cause MultiGet to be slow because it may read more data than requested, but this won't affect correctness. The bug was introduced in 6.10 release. +* Fail recovery and report once hitting a physical log record checksum mismatch, while reading MANIFEST. RocksDB should not continue processing the MANIFEST any further. +* Fixed a bug in size-amp-triggered and periodic-triggered universal compaction, where the compression settings for the first input level were used rather than the compression settings for the output (bottom) level. + +### New Features +* DB identity (`db_id`) and DB session identity (`db_session_id`) are added to table properties and stored in SST files. SST files generated from SstFileWriter and Repairer have DB identity “SST Writer†and “DB Repairerâ€, respectively. Their DB session IDs are generated in the same way as `DB::GetDbSessionId`. The session ID for SstFileWriter (resp., Repairer) resets every time `SstFileWriter::Open` (resp., `Repairer::Run`) is called. +* Added experimental option BlockBasedTableOptions::optimize_filters_for_memory for reducing allocated memory size of Bloom filters (~10% savings with Jemalloc) while preserving the same general accuracy. To have an effect, the option requires format_version=5 and malloc_usable_size. Enabling this option is forward and backward compatible with existing format_version=5. +* `BackupableDBOptions::share_files_with_checksum_naming` is added with new default behavior for naming backup files with `share_files_with_checksum`, to address performance and backup integrity issues. See API comments for details. +* Added auto resume function to automatically recover the DB from background Retryable IO Error. When retryable IOError happens during flush and WAL write, the error is mapped to Hard Error and DB will be in read mode. When retryable IO Error happens during compaction, the error will be mapped to Soft Error. DB is still in write/read mode. Autoresume function will create a thread for a DB to call DB->ResumeImpl() to try the recover for Retryable IO Error during flush and WAL write. Compaction will be rescheduled by itself if retryable IO Error happens. Auto resume may also cause other Retryable IO Error during the recovery, so the recovery will fail. Retry the auto resume may solve the issue, so we use max_bgerror_resume_count to decide how many resume cycles will be tried in total. If it is <=0, auto resume retryable IO Error is disabled. Default is INT_MAX, which will lead to a infinit auto resume. bgerror_resume_retry_interval decides the time interval between two auto resumes. +* Option `max_subcompactions` can be set dynamically using DB::SetDBOptions(). +* Added experimental ColumnFamilyOptions::sst_partitioner_factory to define determine the partitioning of sst files. This helps compaction to split the files on interesting boundaries (key prefixes) to make propagation of sst files less write amplifying (covering the whole key space). + +### Performance Improvements +* Eliminate key copies for internal comparisons while accessing ingested block-based tables. +* Reduce key comparisons during random access in all block-based tables. +* BackupEngine avoids unnecessary repeated checksum computation for backing up a table file to the `shared_checksum` directory when using `share_files_with_checksum_naming = kUseDbSessionId` (new default), except on SST files generated before this version of RocksDB, which fall back on using `kLegacyCrc32cAndFileSize`. + +## 6.11 (2020-06-12) +### Bug Fixes +* Fix consistency checking error swallowing in some cases when options.force_consistency_checks = true. +* Fix possible false NotFound status from batched MultiGet using index type kHashSearch. +* Fix corruption caused by enabling delete triggered compaction (NewCompactOnDeletionCollectorFactory) in universal compaction mode, along with parallel compactions. The bug can result in two parallel compactions picking the same input files, resulting in the DB resurrecting older and deleted versions of some keys. +* Fix a use-after-free bug in best-efforts recovery. column_family_memtables_ needs to point to valid ColumnFamilySet. +* Let best-efforts recovery ignore corrupted files during table loading. +* Fix corrupt key read from ingested file when iterator direction switches from reverse to forward at a key that is a prefix of another key in the same file. It is only possible in files with a non-zero global seqno. +* Fix abnormally large estimate from GetApproximateSizes when a range starts near the end of one SST file and near the beginning of another. Now GetApproximateSizes consistently and fairly includes the size of SST metadata in addition to data blocks, attributing metadata proportionally among the data blocks based on their size. +* Fix potential file descriptor leakage in PosixEnv's IsDirectory() and NewRandomAccessFile(). +* Fix false negative from the VerifyChecksum() API when there is a checksum mismatch in an index partition block in a BlockBasedTable format table file (index_type is kTwoLevelIndexSearch). +* Fix sst_dump to return non-zero exit code if the specified file is not a recognized SST file or fails requested checks. +* Fix incorrect results from batched MultiGet for duplicate keys, when the duplicate key matches the largest key of an SST file and the value type for the key in the file is a merge value. + +### Public API Change +* Flush(..., column_family) may return Status::ColumnFamilyDropped() instead of Status::InvalidArgument() if column_family is dropped while processing the flush request. +* BlobDB now explicitly disallows using the default column family's storage directories as blob directory. +* DeleteRange now returns `Status::InvalidArgument` if the range's end key comes before its start key according to the user comparator. Previously the behavior was undefined. +* ldb now uses options.force_consistency_checks = true by default and "--disable_consistency_checks" is added to disable it. +* DB::OpenForReadOnly no longer creates files or directories if the named DB does not exist, unless create_if_missing is set to true. +* The consistency checks that validate LSM state changes (table file additions/deletions during flushes and compactions) are now stricter, more efficient, and no longer optional, i.e. they are performed even if `force_consistency_checks` is `false`. +* Disable delete triggered compaction (NewCompactOnDeletionCollectorFactory) in universal compaction mode and num_levels = 1 in order to avoid a corruption bug. +* `pin_l0_filter_and_index_blocks_in_cache` no longer applies to L0 files larger than `1.5 * write_buffer_size` to give more predictable memory usage. Such L0 files may exist due to intra-L0 compaction, external file ingestion, or user dynamically changing `write_buffer_size` (note, however, that files that are already pinned will continue being pinned, even after such a dynamic change). +* In point-in-time wal recovery mode, fail database recovery in case of IOError while reading the WAL to avoid data loss. +* A new method `Env::LowerThreadPoolCPUPriority(Priority, CpuPriority)` is added to `Env` to be able to lower to a specific priority such as `CpuPriority::kIdle`. + +### New Features +* sst_dump to add a new --readahead_size argument. Users can specify read size when scanning the data. Sst_dump also tries to prefetch tail part of the SST files so usually some number of I/Os are saved there too. +* Generate file checksum in SstFileWriter if Options.file_checksum_gen_factory is set. The checksum and checksum function name are stored in ExternalSstFileInfo after the sst file write is finished. +* Add a value_size_soft_limit in read options which limits the cumulative value size of keys read in batches in MultiGet. Once the cumulative value size of found keys exceeds read_options.value_size_soft_limit, all the remaining keys are returned with status Abort without further finding their values. By default the value_size_soft_limit is std::numeric_limits::max(). +* Enable SST file ingestion with file checksum information when calling IngestExternalFiles(const std::vector& args). Added files_checksums and files_checksum_func_names to IngestExternalFileArg such that user can ingest the sst files with their file checksum information. Added verify_file_checksum to IngestExternalFileOptions (default is True). To be backward compatible, if DB does not enable file checksum or user does not provide checksum information (vectors of files_checksums and files_checksum_func_names are both empty), verification of file checksum is always sucessful. If DB enables file checksum, DB will always generate the checksum for each ingested SST file during Prepare stage of ingestion and store the checksum in Manifest, unless verify_file_checksum is False and checksum information is provided by the application. In this case, we only verify the checksum function name and directly store the ingested checksum in Manifest. If verify_file_checksum is set to True, DB will verify the ingested checksum and function name with the genrated ones. Any mismatch will fail the ingestion. Note that, if IngestExternalFileOptions::write_global_seqno is True, the seqno will be changed in the ingested file. Therefore, the checksum of the file will be changed. In this case, a new checksum will be generated after the seqno is updated and be stored in the Manifest. + +### Performance Improvements +* Eliminate redundant key comparisons during random access in block-based tables. + +## 6.10 (2020-05-02) +### Bug Fixes +* Fix wrong result being read from ingested file. May happen when a key in the file happen to be prefix of another key also in the file. The issue can further cause more data corruption. The issue exists with rocksdb >= 5.0.0 since DB::IngestExternalFile() was introduced. +* Finish implementation of BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey. It's now ready for use. Significantly reduces read amplification in some setups, especially for iterator seeks. +* Fix a bug by updating CURRENT file so that it points to the correct MANIFEST file after best-efforts recovery. +* Fixed a bug where ColumnFamilyHandle objects were not cleaned up in case an error happened during BlobDB's open after the base DB had been opened. +* Fix a potential undefined behavior caused by trying to dereference nullable pointer (timestamp argument) in DB::MultiGet. +* Fix a bug caused by not including user timestamp in MultiGet LookupKey construction. This can lead to wrong query result since the trailing bytes of a user key, if not shorter than timestamp, will be mistaken for user timestamp. +* Fix a bug caused by using wrong compare function when sorting the input keys of MultiGet with timestamps. +* Upgraded version of bzip library (1.0.6 -> 1.0.8) used with RocksJava to address potential vulnerabilities if an attacker can manipulate compressed data saved and loaded by RocksDB (not normal). See issue #6703. + +### Public API Change +* Add a ConfigOptions argument to the APIs dealing with converting options to and from strings and files. The ConfigOptions is meant to replace some of the options (such as input_strings_escaped and ignore_unknown_options) and allow for more parameters to be passed in the future without changing the function signature. +* Add NewFileChecksumGenCrc32cFactory to the file checksum public API, such that the builtin Crc32c based file checksum generator factory can be used by applications. +* Add IsDirectory to Env and FS to indicate if a path is a directory. + +### New Features +* Added support for pipelined & parallel compression optimization for `BlockBasedTableBuilder`. This optimization makes block building, block compression and block appending a pipeline, and uses multiple threads to accelerate block compression. Users can set `CompressionOptions::parallel_threads` greater than 1 to enable compression parallelism. This feature is experimental for now. +* Provide an allocator for memkind to be used with block cache. This is to work with memory technologies (Intel DCPMM is one such technology currently available) that require different libraries for allocation and management (such as PMDK and memkind). The high capacities available make it possible to provision large caches (up to several TBs in size) beyond what is achievable with DRAM. +* Option `max_background_flushes` can be set dynamically using DB::SetDBOptions(). +* Added functionality in sst_dump tool to check the compressed file size for different compression levels and print the time spent on compressing files with each compression type. Added arguments `--compression_level_from` and `--compression_level_to` to report size of all compression levels and one compression_type must be specified with it so that it will report compressed sizes of one compression type with different levels. +* Added statistics for redundant insertions into block cache: rocksdb.block.cache.*add.redundant. (There is currently no coordination to ensure that only one thread loads a table block when many threads are trying to access that same table block.) + +### Bug Fixes +* Fix a bug when making options.bottommost_compression, options.compression_opts and options.bottommost_compression_opts dynamically changeable: the modified values are not written to option files or returned back to users when being queried. +* Fix a bug where index key comparisons were unaccounted in `PerfContext::user_key_comparison_count` for lookups in files written with `format_version >= 3`. +* Fix many bloom.filter statistics not being updated in batch MultiGet. + +### Performance Improvements +* Improve performance of batch MultiGet with partitioned filters, by sharing block cache lookups to applicable filter blocks. +* Reduced memory copies when fetching and uncompressing compressed blocks from sst files. + +## 6.9.0 (2020-03-29) +### Behavior changes +* Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly. + +### Public API Change +* Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias. +* Updated default format_version in BlockBasedTableOptions from 2 to 4. SST files generated with the new default can be read by RocksDB versions 5.16 and newer, and use more efficient encoding of keys in index blocks. +* A new parameter `CreateBackupOptions` is added to both `BackupEngine::CreateNewBackup` and `BackupEngine::CreateNewBackupWithMetadata`, you can decrease CPU priority of `BackupEngine`'s background threads by setting `decrease_background_thread_cpu_priority` and `background_thread_cpu_priority` in `CreateBackupOptions`. +* Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Finalize should be only called once after all data is processed to generate the final checksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string. + +### Bug Fixes +* Fix a bug where range tombstone blocks in ingested files were cached incorrectly during ingestion. If range tombstones were read from those incorrectly cached blocks, the keys they covered would be exposed. * Fix a data race that might cause crash when calling DB::GetCreationTimeOfOldestFile() by a small chance. The bug was introduced in 6.6 Release. +* Fix a bug where a boolean value optimize_filters_for_hits was for max threads when calling load table handles after a flush or compaction. The value is correct to 1. The bug should not cause user visible problems. +* Fix a bug which might crash the service when write buffer manager fails to insert the dummy handle to the block cache. + +### Performance Improvements +* In CompactRange, for levels starting from 0, if the level does not have any file with any key falling in the specified range, the level is skipped. So instead of always compacting from level 0, the compaction starts from the first level with keys in the specified range until the last such level. +* Reduced memory copy when reading sst footer and blobdb in direct IO mode. +* When restarting a database with large numbers of sst files, large amount of CPU time is spent on getting logical block size of the sst files, which slows down the starting progress, this inefficiency is optimized away with an internal cache for the logical block sizes. + +### New Features +* Basic support for user timestamp in iterator. Seek/SeekToFirst/Next and lower/upper bounds are supported. Reverse iteration is not supported. Merge is not considered. +* When file lock failure when the lock is held by the current process, return acquiring time and thread ID in the error message. +* Added a new option, best_efforts_recovery (default: false), to allow database to open in a db dir with missing table files. During best efforts recovery, missing table files are ignored, and database recovers to the most recent state without missing table file. Cross-column-family consistency is not guaranteed even if WAL is enabled. +* options.bottommost_compression, options.compression_opts and options.bottommost_compression_opts are now dynamically changeable. -## 6.8.0 (02/24/2020) +## 6.8.0 (2020-02-24) ### Java API Changes * Major breaking changes to Java comparators, toward standardizing on ByteBuffer for performant, locale-neutral operations on keys (#6252). * Added overloads of common API methods using direct ByteBuffers for keys and values (#2283). @@ -30,7 +728,7 @@ * `db_bench` now supports `value_size_distribution_type`, `value_size_min`, `value_size_max` options for generating random variable sized value. Added `blob_db_compression_type` option for BlobDB to enable blob compression. * Replace RocksDB namespace "rocksdb" with flag "ROCKSDB_NAMESPACE" which if is not defined, defined as "rocksdb" in header file rocksdb_namespace.h. -## 6.7.0 (01/21/2020) +## 6.7.0 (2020-01-21) ### Public API Change * Added a rocksdb::FileSystem class in include/rocksdb/file_system.h to encapsulate file creation/read/write operations, and an option DBOptions::file_system to allow a user to pass in an instance of rocksdb::FileSystem. If its a non-null value, this will take precendence over DBOptions::env for file operations. A new API rocksdb::FileSystem::Default() returns a platform default object. The DBOptions::env option and Env::Default() API will continue to be used for threading and other OS related functions, and where DBOptions::file_system is not specified, for file operations. For storage developers who are accustomed to rocksdb::Env, the interface in rocksdb::FileSystem is new and will probably undergo some changes as more storage systems are ported to it from rocksdb::Env. As of now, no env other than Posix has been ported to the new interface. * A new rocksdb::NewSstFileManager() API that allows the caller to pass in separate Env and FileSystem objects. @@ -55,11 +753,11 @@ * Introduce ReadOptions.auto_prefix_mode. When set to true, iterator will return the same result as total order seek, but may choose to use prefix seek internally based on seek key and iterator upper bound. * MultiGet() can use IO Uring to parallelize read from the same SST file. This featuer is by default disabled. It can be enabled with environment variable ROCKSDB_USE_IO_URING. -## 6.6.2 (01/13/2020) +## 6.6.2 (2020-01-13) ### Bug Fixes * Fixed a bug where non-L0 compaction input files were not considered to compute the `creation_time` of new compaction outputs. -## 6.6.1 (01/02/2020) +## 6.6.1 (2020-01-02) ### Bug Fixes * Fix a bug in WriteBatchWithIndex::MultiGetFromBatchAndDB, which is called by Transaction::MultiGet, that causes due to stale pointer access when the number of keys is > 32 * Fixed two performance issues related to memtable history trimming. First, a new SuperVersion is now created only if some memtables were actually trimmed. Second, trimming is only scheduled if there is at least one flushed memtable that is kept in memory for the purposes of transaction conflict checking. @@ -69,7 +767,7 @@ * Delete superversions in BackgroundCallPurge. * Fix use-after-free and double-deleting files in BackgroundCallPurge(). -## 6.6.0 (11/25/2019) +## 6.6.0 (2019-11-25) ### Bug Fixes * Fix data corruption caused by output of intra-L0 compaction on ingested file not being placed in correct order in L0. * Fix a data race between Version::GetColumnFamilyMetaData() and Compaction::MarkFilesBeingCompacted() for access to being_compacted (#6056). The current fix acquires the db mutex during Version::GetColumnFamilyMetaData(), which may cause regression. @@ -122,19 +820,19 @@ * For 64-bit hashing, RocksDB is standardizing on a slightly modified preview version of XXH3. This function is now used for many non-persisted hashes, along with fastrange64() in place of the modulus operator, and some benchmarks show a slight improvement. * Level iterator to invlidate the iterator more often in prefix seek and the level is filtered out by prefix bloom. -## 6.5.2 (11/15/2019) +## 6.5.2 (2019-11-15) ### Bug Fixes * Fix a assertion failure in MultiGet() when BlockBasedTableOptions::no_block_cache is true and there is no compressed block cache * Fix a buffer overrun problem in BlockBasedTable::MultiGet() when compression is enabled and no compressed block cache is configured. * If a call to BackupEngine::PurgeOldBackups or BackupEngine::DeleteBackup suffered a crash, power failure, or I/O error, files could be left over from old backups that could only be purged with a call to GarbageCollect. Any call to PurgeOldBackups, DeleteBackup, or GarbageCollect should now suffice to purge such files. -## 6.5.1 (10/16/2019) +## 6.5.1 (2019-10-16) ### Bug Fixes * Revert the feature "Merging iterator to avoid child iterator reseek for some cases (#5286)" since it might cause strange results when reseek happens with a different iterator upper bound. * Fix a bug in BlockBasedTableIterator that might return incorrect results when reseek happens with a different iterator upper bound. * Fix a bug when partitioned filters and prefix search are used in conjunction, ::SeekForPrev could return invalid for an existing prefix. ::SeekForPrev might be called by the user, or internally on ::Prev, or within ::Seek if the return value involves Delete or a Merge operand. -## 6.5.0 (9/13/2019) +## 6.5.0 (2019-09-13) ### Bug Fixes * Fixed a number of data races in BlobDB. * Fix a bug where the compaction snapshot refresh feature is not disabled as advertised when `snap_refresh_nanos` is set to 0.. @@ -155,7 +853,7 @@ ### Performance Improvements * Improve the speed of the MemTable Bloom filter, reducing the write overhead of enabling it by 1/3 to 1/2, with similar benefit to read performance. -## 6.4.0 (7/30/2019) +## 6.4.0 (2019-07-30) ### Default Option Change * LRUCacheOptions.high_pri_pool_ratio is set to 0.5 (previously 0.0) by default, which means that by default midpoint insertion is enabled. The same change is made for the default value of high_pri_pool_ratio argument in NewLRUCache(). When block cache is not explicitly created, the small block cache created by BlockBasedTable will still has this option to be 0.0. * Change BlockBasedTableOptions.cache_index_and_filter_blocks_with_high_priority's default value from false to true. @@ -191,7 +889,7 @@ * Fixed a regression where the fill_cache read option also affected index blocks. * Fixed an issue where using cache_index_and_filter_blocks==false affected partitions of partitioned indexes/filters as well. -## 6.3.2 (8/15/2019) +## 6.3.2 (2019-08-15) ### Public API Change * The semantics of the per-block-type block read counts in the performance context now match those of the generic block_read_count. @@ -199,11 +897,11 @@ * Fixed a regression where the fill_cache read option also affected index blocks. * Fixed an issue where using cache_index_and_filter_blocks==false affected partitions of partitioned indexes as well. -## 6.3.1 (7/24/2019) +## 6.3.1 (2019-07-24) ### Bug Fixes * Fix auto rolling bug introduced in 6.3.0, which causes segfault if log file creation fails. -## 6.3.0 (6/18/2019) +## 6.3.0 (2019-06-18) ### Public API Change * Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released. * Index blocks are now handled similarly to data blocks with regards to the block cache: instead of storing objects in the cache, only the blocks themselves are cached. In addition, index blocks no longer get evicted from the cache when a table is closed, can now use the compressed block cache (if any), and can be shared among multiple table readers. @@ -240,7 +938,7 @@ * Fix a bug caused by secondary not skipping the beginning of new MANIFEST. * On DB open, delete WAL trash files left behind in wal_dir -## 6.2.0 (4/30/2019) +## 6.2.0 (2019-04-30) ### New Features * Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`. * Improve range scan performance by avoiding per-key upper bound check in BlockBasedTableIterator. @@ -262,7 +960,7 @@ * Close a WAL file before another thread deletes it. * Fix an assertion failure `IsFlushPending() == true` caused by one bg thread releasing the db mutex in ~ColumnFamilyData and another thread clearing `flush_requested_` flag. -## 6.1.1 (4/9/2019) +## 6.1.1 (2019-04-09) ### New Features * When reading from option file/string/map, customized comparators and/or merge operators can be filled according to object registry. @@ -272,7 +970,7 @@ * Fix a bug in 2PC where a sequence of txn prepare, memtable flush, and crash could result in losing the prepared transaction. * Fix a bug in Encryption Env which could cause encrypted files to be read beyond file boundaries. -## 6.1.0 (3/27/2019) +## 6.1.0 (2019-03-27) ### New Features * Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers. * Added a feature to perform data-block sampling for compressibility, and report stats to user. @@ -290,7 +988,7 @@ * Fix JEMALLOC_CXX_THROW macro missing from older Jemalloc versions, causing build failures on some platforms. * Fix SstFileReader not able to open file ingested with write_glbal_seqno=true. -## 6.0.0 (2/19/2019) +## 6.0.0 (2019-02-19) ### New Features * Enabled checkpoint on readonly db (DBImplReadOnly). * Make DB ignore dropped column families while committing results of atomic flush. @@ -332,7 +1030,7 @@ ### Change Default Options * Change options.compaction_pri's default to kMinOverlappingRatio -## 5.18.0 (11/30/2018) +## 5.18.0 (2018-11-30) ### New Features * Introduced `JemallocNodumpAllocator` memory allocator. When being use, block cache will be excluded from core dump. * Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context. Added per-level perf context for bloom filter and `Get` query. @@ -360,7 +1058,7 @@ * Start populating `NO_FILE_CLOSES` ticker statistic, which was always zero previously. * The default value of NewBloomFilterPolicy()'s argument use_block_based_builder is changed to false. Note that this new default may cause large temp memory usage when building very large SST files. -## 5.17.0 (10/05/2018) +## 5.17.0 (2018-10-05) ### Public API Change * `OnTableFileCreated` will now be called for empty files generated during compaction. In that case, `TableFileCreationInfo::file_path` will be "(nil)" and `TableFileCreationInfo::file_size` will be zero. * Add `FlushOptions::allow_write_stall`, which controls whether Flush calls start working immediately, even if it causes user writes to stall, or will wait until flush can be performed without causing write stall (similar to `CompactRangeOptions::allow_write_stall`). Note that the default value is false, meaning we add delay to Flush calls until stalling can be avoided when possible. This is behavior change compared to previous RocksDB versions, where Flush calls didn't check if they might cause stall or not. @@ -374,21 +1072,21 @@ * Avoid creating empty SSTs and subsequently deleting them in certain cases during compaction. * Sync CURRENT file contents during checkpoint. -## 5.16.3 (10/1/2018) +## 5.16.3 (2018-10-01) ### Bug Fixes * Fix crash caused when `CompactFiles` run with `CompactionOptions::compression == CompressionType::kDisableCompressionOption`. Now that setting causes the compression type to be chosen according to the column family-wide compression options. -## 5.16.2 (9/21/2018) +## 5.16.2 (2018-09-21) ### Bug Fixes * Fix bug in partition filters with format_version=4. -## 5.16.1 (9/17/2018) +## 5.16.1 (2018-09-17) ### Bug Fixes * Remove trace_analyzer_tool from rocksdb_lib target in TARGETS file. * Fix RocksDB Java build and tests. * Remove sync point in Block destructor. -## 5.16.0 (8/21/2018) +## 5.16.0 (2018-08-21) ### Public API Change * The merge operands are passed to `MergeOperator::ShouldMerge` in the reversed order relative to how they were merged (passed to FullMerge or FullMergeV2) for performance reasons * GetAllKeyVersions() to take an extra argument of `max_num_ikeys`. @@ -402,7 +1100,7 @@ ### Bug Fixes * Fix a bug in misreporting the estimated partition index size in properties block. -## 5.15.0 (7/17/2018) +## 5.15.0 (2018-07-17) ### Public API Change * Remove managed iterator. ReadOptions.managed is not effective anymore. * For bottommost_compression, a compatible CompressionOptions is added via `bottommost_compression_opts`. To keep backward compatible, a new boolean `enabled` is added to CompressionOptions. For compression_opts, it will be always used no matter what value of `enabled` is. For bottommost_compression_opts, it will only be used when user set `enabled=true`, otherwise, compression_opts will be used for bottommost_compression as default. @@ -428,7 +1126,7 @@ * Fix a bug caused by not copying the block trailer with compressed SST file, direct IO, prefetcher and no compressed block cache. * Fix write can stuck indefinitely if enable_pipelined_write=true. The issue exists since pipelined write was introduced in 5.5.0. -## 5.14.0 (5/16/2018) +## 5.14.0 (2018-05-16) ### Public API Change * Add a BlockBasedTableOption to align uncompressed data blocks on the smaller of block size or page size boundary, to reduce flash reads by avoiding reads spanning 4K pages. * The background thread naming convention changed (on supporting platforms) to "rocksdb:", e.g., "rocksdb:low0". @@ -461,7 +1159,7 @@ * Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances. * Added SstFileManager to the Java API to allow managing SST files across DB instances. -## 5.13.0 (3/20/2018) +## 5.13.0 (2018-03-20) ### Public API Change * RocksDBOptionsParser::Parse()'s `ignore_unknown_options` argument will only be effective if the option file shows it is generated using a higher version of RocksDB than the current version. * Remove CompactionEventListener. @@ -477,7 +1175,7 @@ * Fix a leak in prepared_section_completed_ where the zeroed entries would not removed from the map. * Fix WAL corruption caused by race condition between user write thread and backup/checkpoint thread. -## 5.12.0 (2/14/2018) +## 5.12.0 (2018-02-14) ### Public API Change * Iterator::SeekForPrev is now a pure virtual method. This is to prevent user who implement the Iterator interface fail to implement SeekForPrev by mistake. * Add `include_end` option to make the range end exclusive when `include_end == false` in `DeleteFilesInRange()`. @@ -499,7 +1197,7 @@ * Fix advance reservation of arena block addresses. * Fix handling of empty string as checkpoint directory. -## 5.11.0 (01/08/2018) +## 5.11.0 (2018-01-08) ### Public API Change * Add `autoTune` and `getBytesPerSecond()` to RocksJava RateLimiter @@ -516,7 +1214,7 @@ * Fix a mislabel bug for bottom-pri compaction threads. * Fix DB::Flush() keep waiting after flush finish under certain condition. -## 5.10.0 (12/11/2017) +## 5.10.0 (2017-12-11) ### Public API Change * When running `make` with environment variable `USE_SSE` set and `PORTABLE` unset, will use all machine features available locally. Previously this combination only compiled SSE-related features. @@ -531,7 +1229,7 @@ * Fix performance issue in `IngestExternalFile()` affecting databases with large number of SST files. * Fix possible corruption to LSM structure when `DeleteFilesInRange()` deletes a subset of files spanned by a `DeleteRange()` marker. -## 5.9.0 (11/1/2017) +## 5.9.0 (2017-11-01) ### Public API Change * `BackupableDBOptions::max_valid_backups_to_open == 0` now means no backups will be opened during BackupEngine initialization. Previously this condition disabled limiting backups opened. * `DBOptions::preserve_deletes` is a new option that allows one to specify that DB should not drop tombstones for regular deletes if they have sequence number larger than what was set by the new API call `DB::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum)`. Disabled by default. @@ -558,7 +1256,7 @@ * Fix a potential data inconsistency issue during point-in-time recovery. `DB:Open()` will abort if column family inconsistency is found during PIT recovery. * Fix possible metadata corruption in databases using `DeleteRange()`. -## 5.8.0 (08/30/2017) +## 5.8.0 (2017-08-30) ### Public API Change * Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints. * `Slice::compare` and BytewiseComparator `Compare` no longer accept `Slice`s containing nullptr. @@ -578,7 +1276,7 @@ * Fix transient reappearance of keys covered by range deletions when memtable prefix bloom filter is enabled. * Fix potentially wrong file smallest key when range deletions separated by snapshot are written together. -## 5.7.0 (07/13/2017) +## 5.7.0 (2017-07-13) ### Public API Change * DB property "rocksdb.sstables" now prints keys in hex form. @@ -593,7 +1291,7 @@ ### Bug Fixes * Fix discarding empty compaction output files when `DeleteRange()` is used together with subcompactions. -## 5.6.0 (06/06/2017) +## 5.6.0 (2017-06-06) ### Public API Change * Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads. * Replace `Options::max_background_flushes`, `Options::max_background_compactions`, and `Options::base_background_compactions` all with `Options::max_background_jobs`, which automatically decides how many threads to allocate towards flush/compaction. @@ -610,7 +1308,7 @@ ### Bug Fixes * Shouldn't ignore return value of fsync() in flush. -## 5.5.0 (05/17/2017) +## 5.5.0 (2017-05-17) ### New Features * FIFO compaction to support Intra L0 compaction too with CompactionOptionsFIFO.allow_compaction=true. * DB::ResetStats() to reset internal stats. @@ -627,7 +1325,7 @@ ### Bug Fixes * Fix the bug that Direct I/O uses direct reads for non-SST file -## 5.4.0 (04/11/2017) +## 5.4.0 (2017-04-11) ### Public API Change * random_access_max_buffer_size no longer has any effect * Removed Env::EnableReadAhead(), Env::ShouldForwardRawRequest() @@ -644,7 +1342,7 @@ * Introduce level-based L0->L0 compactions to reduce file count, so write delays are incurred less often. * (Experimental) Partitioning filters which creates an index on the partitions. The feature can be enabled by setting partition_filters when using kFullFilter. Currently the feature also requires two-level indexing to be enabled. Number of partitions is the same as the number of partitions for indexes, which is controlled by metadata_block_size. -## 5.3.0 (03/08/2017) +## 5.3.0 (2017-03-08) ### Public API Change * Remove disableDataSync option. * Remove timeout_hint_us option from WriteOptions. The option has been deprecated and has no effect since 3.13.0. @@ -654,7 +1352,7 @@ ### Bug Fixes * Fix the bug that iterator may skip keys -## 5.2.0 (02/08/2017) +## 5.2.0 (2017-02-08) ### Public API Change * NewLRUCache() will determine number of shard bits automatically based on capacity, if the user doesn't pass one. This also impacts the default block cache when the user doesn't explicit provide one. * Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files. @@ -672,7 +1370,7 @@ * Some fixes related to 2PC. * Fix bugs of data corruption in direct I/O -## 5.1.0 (01/13/2017) +## 5.1.0 (2017-01-13) * Support dynamically change `delete_obsolete_files_period_micros` option via SetDBOptions(). * Added EventListener::OnExternalFileIngested which will be called when IngestExternalFile() add a file successfully. * BackupEngine::Open and BackupEngineReadOnly::Open now always return error statuses matching those of the backup Env. @@ -681,7 +1379,7 @@ * Fix the bug that if 2PC is enabled, checkpoints may loss some recent transactions. * When file copying is needed when creating checkpoints or bulk loading files, fsync the file after the file copying. -## 5.0.0 (11/17/2016) +## 5.0.0 (2016-11-17) ### Public API Change * Options::max_bytes_for_level_multiplier is now a double along with all getters and setters. * Support dynamically change `delayed_write_rate` and `max_total_wal_size` options via SetDBOptions(). @@ -700,7 +1398,7 @@ * Add LuaCompactionFilter in utilities. This allows developers to write compaction filters in Lua. To use this feature, LUA_PATH needs to be set to the root directory of Lua. * No longer populate "LATEST_BACKUP" file in backup directory, which formerly contained the number of the latest backup. The latest backup can be determined by finding the highest numbered file in the "meta/" subdirectory. -## 4.13.0 (10/18/2016) +## 4.13.0 (2016-10-18) ### Public API Change * DB::GetOptions() reflect dynamic changed options (i.e. through DB::SetOptions()) and return copy of options instead of reference. * Added Statistics::getAndResetTickerCount(). @@ -709,7 +1407,7 @@ * Add DB::SetDBOptions() to dynamic change base_background_compactions and max_background_compactions. * Added Iterator::SeekForPrev(). This new API will seek to the last key that less than or equal to the target key. -## 4.12.0 (9/12/2016) +## 4.12.0 (2016-09-12) ### Public API Change * CancelAllBackgroundWork() flushes all memtables for databases containing writes that have bypassed the WAL (writes issued with WriteOptions::disableWAL=true) before shutting down background threads. * Merge options source_compaction_factor, max_grandparent_overlap_bytes and expanded_compaction_factor into max_compaction_bytes. @@ -721,7 +1419,7 @@ * Change ticker/histogram statistics implementations to accumulate data in thread-local storage, which improves CPU performance by reducing cache coherency costs. Callers of CreateDBStatistics do not need to change anything to use this feature. * Block cache mid-point insertion, where index and filter block are inserted into LRU block cache with higher priority. The feature can be enabled by setting BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority to true and high_pri_pool_ratio > 0 when creating NewLRUCache. -## 4.11.0 (8/1/2016) +## 4.11.0 (2016-08-01) ### Public API Change * options.memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size. When it is set, RocksDB will try to allocate memory from huge page for memtable too, rather than just memtable bloom filter. @@ -729,7 +1427,7 @@ * A tool to migrate DB after options change. See include/rocksdb/utilities/option_change_migration.h. * Add ReadOptions.background_purge_on_iterator_cleanup. If true, we avoid file deletion when destroying iterators. -## 4.10.0 (7/5/2016) +## 4.10.0 (2016-07-05) ### Public API Change * options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes * enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one. @@ -741,7 +1439,7 @@ * RepairDB support for column families. RepairDB now associates data with non-default column families using information embedded in the SST/WAL files (4.7 or later). For data written by 4.6 or earlier, RepairDB associates it with the default column family. * Add options.write_buffer_manager which allows users to control total memtable sizes across multiple DB instances. -## 4.9.0 (6/9/2016) +## 4.9.0 (2016-06-09) ### Public API changes * Add bottommost_compression option, This option can be used to set a specific compression algorithm for the bottommost level (Last level containing files in the DB). * Introduce CompactionJobInfo::compression, This field state the compression algorithm used to generate the output files of the compaction. @@ -751,7 +1449,7 @@ ### New Features * Introduce NewSimCache() in rocksdb/utilities/sim_cache.h. This function creates a block cache that is able to give simulation results (mainly hit rate) of simulating block behavior with a configurable cache size. -## 4.8.0 (5/2/2016) +## 4.8.0 (2016-05-02) ### Public API Change * Allow preset compression dictionary for improved compression of block-based tables. This is supported for zlib, zstd, and lz4. The compression dictionary's size is configurable via CompressionOptions::max_dict_bytes. * Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F @@ -761,12 +1459,12 @@ ### New Features * Add ReadOptions::readahead_size. If non-zero, NewIterator will create a new table reader which performs reads of the given size. -## 4.7.0 (4/8/2016) +## 4.7.0 (2016-04-08) ### Public API Change * rename options compaction_measure_io_stats to report_bg_io_stats and include flush too. * Change some default options. Now default options will optimize for server-workloads. Also enable slowdown and full stop triggers for pending compaction bytes. These changes may cause sub-optimal performance or significant increase of resource usage. To avoid these risks, users can open existing RocksDB with options extracted from RocksDB option files. See https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File for how to use RocksDB option files. Or you can call Options.OldDefaults() to recover old defaults. DEFAULT_OPTIONS_HISTORY.md will track change history of default options. -## 4.6.0 (3/10/2016) +## 4.6.0 (2016-03-10) ### Public API Changes * Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier. * Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signature of Cache::Insert() is updated accordingly. @@ -777,7 +1475,7 @@ * Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. * Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" -## 4.5.0 (2/5/2016) +## 4.5.0 (2016-02-05) ### Public API Changes * Add a new perf context level between kEnableCount and kEnableTime. Level 2 now does not include timers for mutexes. * Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. @@ -788,7 +1486,7 @@ * Add kPersistedTier to ReadTier. This option allows Get and MultiGet to read only the persited data and skip mem-tables if writes were done with disableWAL = true. * Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate. -## 4.4.0 (1/14/2016) +## 4.4.0 (2016-01-14) ### Public API Changes * Change names in CompactionPri and add a new one. * Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit. @@ -798,7 +1496,7 @@ * Increase default options.delayed_write_rate to 2MB/s. * Added a new parameter --path to ldb tool. --path accepts the name of either MANIFEST, SST or a WAL file. Either --db or --path can be used when calling ldb. -## 4.3.0 (12/8/2015) +## 4.3.0 (2015-12-08) ### New Features * CompactionFilter has new member function called IgnoreSnapshots which allows CompactionFilter to be called even if there are snapshots later than the key. * RocksDB will now persist options under the same directory as the RocksDB database on successful DB::Open, CreateColumnFamily, DropColumnFamily, and SetOptions. @@ -808,7 +1506,7 @@ ### Public API Changes * When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families. -## 4.2.0 (11/9/2015) +## 4.2.0 (2015-11-09) ### New Features * Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions. * Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families. @@ -821,7 +1519,7 @@ * Remove DefaultCompactionFilterFactory. -## 4.1.0 (10/8/2015) +## 4.1.0 (2015-10-08) ### New Features * Added single delete operation as a more efficient way to delete keys that have not been overwritten. * Added experimental AddFile() to DB interface that allow users to add files created by SstFileWriter into an empty Database, see include/rocksdb/sst_file_writer.h and DB::AddFile() for more info. @@ -835,7 +1533,7 @@ * CompactionFilter has a new method FilterMergeOperand() that RocksDB applies to every merge operand during compaction to decide whether to filter the operand. * We removed CompactionFilterV2 interfaces from include/rocksdb/compaction_filter.h. The functionality was deprecated already in version 3.13. -## 4.0.0 (9/9/2015) +## 4.0.0 (2015-09-09) ### New Features * Added support for transactions. See include/rocksdb/utilities/transaction.h for more info. * DB::GetProperty() now accepts "rocksdb.aggregated-table-properties" and "rocksdb.aggregated-table-properties-at-levelN", in which case it returns aggregated table properties of the target column family, or the aggregated table properties of the specified level N if the "at-level" version is used. @@ -848,7 +1546,7 @@ * Added Equal() method to the Comparator interface that can optionally be overwritten in cases where equality comparisons can be done more efficiently than three-way comparisons. * Previous 'experimental' OptimisticTransaction class has been replaced by Transaction class. -## 3.13.0 (8/6/2015) +## 3.13.0 (2015-08-06) ### New Features * RollbackToSavePoint() in WriteBatch/WriteBatchWithIndex * Add NewCompactOnDeletionCollectorFactory() in utilities/table_properties_collectors, which allows rocksdb to mark a SST file as need-compaction when it observes at least D deletion entries in any N consecutive entries in that SST file. Note that this feature depends on an experimental NeedCompact() API --- the result of this API will not persist after DB restart. @@ -863,7 +1561,7 @@ * Add statistics::getHistogramString() to print detailed distribution of a histogram metric. * Add DBOptions::skip_stats_update_on_db_open. When it is on, DB::Open() will run faster as it skips the random reads required for loading necessary stats from SST files to optimize compaction. -## 3.12.0 (7/2/2015) +## 3.12.0 (2015-07-02) ### New Features * Added experimental support for optimistic transactions. See include/rocksdb/utilities/optimistic_transaction.h for more info. * Added a new way to report QPS from db_bench (check out --report_file and --report_interval_seconds) @@ -893,7 +1591,7 @@ * Add BackupEngineImpl.options_.max_background_operations to specify the maximum number of operations that may be performed in parallel. Add support for parallelized backup and restore. * Add DB::SyncWAL() that does a WAL sync without blocking writers. -## 3.11.0 (5/19/2015) +## 3.11.0 (2015-05-19) ### New Features * Added a new API Cache::SetCapacity(size_t capacity) to dynamically change the maximum configured capacity of the cache. If the new capacity is less than the existing cache usage, the implementation will try to lower the usage by evicting the necessary number of elements following a strict LRU policy. * Added an experimental API for handling flashcache devices (blacklists background threads from caching their reads) -- NewFlashcacheAwareEnv @@ -904,7 +1602,7 @@ * TablePropertiesCollector::AddUserKey() is added to replace TablePropertiesCollector::Add(). AddUserKey() exposes key type, sequence number and file size up to now to users. * DBOptions::bytes_per_sync used to apply to both WAL and table files. As of 3.11 it applies only to table files. If you want to use this option to sync WAL in the background, please use wal_bytes_per_sync -## 3.10.0 (3/24/2015) +## 3.10.0 (2015-03-24) ### New Features * GetThreadStatus() is now able to report detailed thread status, including: - Thread Operation including flush and compaction. @@ -939,7 +1637,7 @@ * lz4 compression is now included in rocksjava static library when running `make rocksdbjavastatic`. * Overflowing a size_t when setting rocksdb options now throws an IllegalArgumentException, which removes the necessity for a developer to catch these Exceptions explicitly. -## 3.9.0 (12/8/2014) +## 3.9.0 (2014-12-08) ### New Features * Add rocksdb::GetThreadList(), which in the future will return the current status of all @@ -958,7 +1656,7 @@ ### Improvements * RocksDBLite library now becomes smaller and will be compiled with -fno-exceptions flag. -## 3.8.0 (11/14/2014) +## 3.8.0 (2014-11-14) ### Public API changes * BackupEngine::NewBackupEngine() was deprecated; please use BackupEngine::Open() from now on. @@ -972,14 +1670,14 @@ * CompactFiles and EventListener, although they are still in experimental state * Full ColumnFamily support in RocksJava. -## 3.7.0 (11/6/2014) +## 3.7.0 (2014-11-06) ### Public API changes * Introduce SetOptions() API to allow adjusting a subset of options dynamically online * Introduce 4 new convenient functions for converting Options from string: GetColumnFamilyOptionsFromMap(), GetColumnFamilyOptionsFromString(), GetDBOptionsFromMap(), GetDBOptionsFromString() * Remove WriteBatchWithIndex.Delete() overloads using SliceParts * When opening a DB, if options.max_background_compactions is larger than the existing low pri pool of options.env, it will enlarge it. Similarly, options.max_background_flushes is larger than the existing high pri pool of options.env, it will enlarge it. -## 3.6.0 (10/7/2014) +## 3.6.0 (2014-10-07) ### Disk format changes * If you're using RocksDB on ARM platforms and you're using default bloom filter, there is a disk format change you need to be aware of. There are three steps you need to do when you convert to new release: 1. turn off filter policy, 2. compact the whole database, 3. turn on filter policy @@ -992,7 +1690,7 @@ * Change target_file_size_base type to uint64_t from int. * Remove allow_thread_local. This feature was proved to be stable, so we are turning it always-on. -## 3.5.0 (9/3/2014) +## 3.5.0 (2014-09-03) ### New Features * Add include/utilities/write_batch_with_index.h, providing a utility class to query data out of WriteBatch when building it. * Move BlockBasedTable related options to BlockBasedTableOptions from Options. Change corresponding JNI interface. Options affected include: @@ -1003,7 +1701,7 @@ ### Public API changes * The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key. -## 3.4.0 (8/18/2014) +## 3.4.0 (2014-08-18) ### New Features * Support Multiple DB paths in universal style compactions * Add feature of storing plain table index and bloom filter in SST file. @@ -1019,7 +1717,7 @@ * Add DB::GetIntProperty(), which returns DB properties that are integer as uint64_t. * The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key. -## 3.3.0 (7/10/2014) +## 3.3.0 (2014-07-10) ### New Features * Added JSON API prototype. * HashLinklist reduces performance outlier caused by skewed bucket by switching data in the bucket from linked list to skip list. Add parameter threshold_use_skiplist in NewHashLinkListRepFactory(). @@ -1030,7 +1728,7 @@ ### Public API changes * Removed NewTotalOrderPlainTableFactory because it is not used and implemented semantically incorrect. -## 3.2.0 (06/20/2014) +## 3.2.0 (2014-06-20) ### Public API changes * We removed seek compaction as a concept from RocksDB because: @@ -1048,7 +1746,7 @@ ### Performance Improvements * Tailing Iterator re-implemeted with ForwardIterator + Cascading Search Hint , see ~20% throughput improvement. -## 3.1.0 (05/21/2014) +## 3.1.0 (2014-05-21) ### Public API changes * Replaced ColumnFamilyOptions::table_properties_collectors with ColumnFamilyOptions::table_properties_collector_factories @@ -1057,7 +1755,7 @@ * Hash index for block-based table will be materialized and reconstructed more efficiently. Previously hash index is constructed by scanning the whole table during every table open. * FIFO compaction style -## 3.0.0 (05/05/2014) +## 3.0.0 (2014-05-05) ### Public API changes * Added _LEVEL to all InfoLogLevel enums @@ -1069,7 +1767,7 @@ * Added an option to use different checksum functions in BlockBasedTableOptions * Added ApplyToAllCacheEntries() function to Cache -## 2.8.0 (04/04/2014) +## 2.8.0 (2014-04-04) * Removed arena.h from public header files. * By default, checksums are verified on every read from database @@ -1098,7 +1796,7 @@ * Now compaction filter has a V2 interface. It buffers the kv-pairs sharing the same key prefix, process them in batches, and return the batched results back to DB. The new interface uses a new structure CompactionFilterContext for the same purpose as CompactionFilter::Context in V1. * Geo-spatial support for locations and radial-search. -## 2.7.0 (01/28/2014) +## 2.7.0 (2014-01-28) ### Public API changes diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/INSTALL.md mariadb-10.11.13/storage/rocksdb/rocksdb/INSTALL.md --- mariadb-10.11.11/storage/rocksdb/rocksdb/INSTALL.md 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/INSTALL.md 2025-05-19 16:14:27.000000000 +0000 @@ -43,6 +43,8 @@ command line flags processing. You can compile rocksdb library even if you don't have gflags installed. +* `make check` will also check code formatting, which requires [clang-format](https://clang.llvm.org/docs/ClangFormat.html) + * If you wish to build the RocksJava static target, then cmake is required for building Snappy. ## Supported platforms @@ -94,12 +96,21 @@ sudo yum install libasan * Install zstandard: + * With [EPEL](https://fedoraproject.org/wiki/EPEL): + + sudo yum install libzstd-devel + + * With CentOS 8: + + sudo dnf install libzstd-devel + + * From source: - wget https://github.com/facebook/zstd/archive/v1.1.3.tar.gz - mv v1.1.3.tar.gz zstd-1.1.3.tar.gz - tar zxvf zstd-1.1.3.tar.gz - cd zstd-1.1.3 - make && sudo make install + wget https://github.com/facebook/zstd/archive/v1.1.3.tar.gz + mv v1.1.3.tar.gz zstd-1.1.3.tar.gz + tar zxvf zstd-1.1.3.tar.gz + cd zstd-1.1.3 + make && sudo make install * **OS X**: * Install latest C++ compiler that supports C++ 11: diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/LANGUAGE-BINDINGS.md mariadb-10.11.13/storage/rocksdb/rocksdb/LANGUAGE-BINDINGS.md --- mariadb-10.11.11/storage/rocksdb/rocksdb/LANGUAGE-BINDINGS.md 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/LANGUAGE-BINDINGS.md 2025-05-19 16:14:27.000000000 +0000 @@ -1,6 +1,6 @@ This is the list of all known third-party language bindings for RocksDB. If something is missing, please open a pull request to add it. -* Java - https://github.com/facebook/rocksdb/tree/master/java +* Java - https://github.com/facebook/rocksdb/tree/main/java * Python * http://python-rocksdb.readthedocs.io/en/latest/ * http://pyrocksdb.readthedocs.org/en/latest/ (unmaintained) @@ -10,7 +10,9 @@ * Ruby - http://rubygems.org/gems/rocksdb-ruby * Haskell - https://hackage.haskell.org/package/rocksdb-haskell * PHP - https://github.com/Photonios/rocksdb-php -* C# - https://github.com/warrenfalk/rocksdb-sharp +* C# + * https://github.com/warrenfalk/rocksdb-sharp + * https://github.com/curiosity-ai/rocksdb-sharp * Rust * https://github.com/pingcap/rust-rocksdb (used in production fork of https://github.com/spacejam/rust-rocksdb) * https://github.com/spacejam/rust-rocksdb diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/Makefile mariadb-10.11.13/storage/rocksdb/rocksdb/Makefile --- mariadb-10.11.11/storage/rocksdb/rocksdb/Makefile 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/Makefile 2025-05-19 16:14:27.000000000 +0000 @@ -8,6 +8,11 @@ BASH_EXISTS := $(shell which bash) SHELL := $(shell which bash) +# Default to python3. Some distros like CentOS 8 do not have `python`. +ifeq ($(origin PYTHON), undefined) + PYTHON := $(shell which python3 || which python || echo python3) +endif +export PYTHON CLEAN_FILES = # deliberately empty, so we can append below. CFLAGS += ${EXTRA_CFLAGS} @@ -43,60 +48,43 @@ # Set the default DEBUG_LEVEL to 1 DEBUG_LEVEL?=1 -ifeq ($(MAKECMDGOALS),dbg) - DEBUG_LEVEL=2 -endif - -ifeq ($(MAKECMDGOALS),clean) - DEBUG_LEVEL=0 -endif - -ifeq ($(MAKECMDGOALS),release) - DEBUG_LEVEL=0 -endif +# LIB_MODE says whether or not to use/build "shared" or "static" libraries. +# Mode "static" means to link against static libraries (.a) +# Mode "shared" means to link against shared libraries (.so, .sl, .dylib, etc) +# +# Set the default LIB_MODE to static +LIB_MODE?=static -ifeq ($(MAKECMDGOALS),shared_lib) - DEBUG_LEVEL=0 -endif +# OBJ_DIR is where the object files reside. Default to the current directory +OBJ_DIR?=. -ifeq ($(MAKECMDGOALS),install-shared) - DEBUG_LEVEL=0 -endif +# Check the MAKECMDGOALS to set the DEBUG_LEVEL and LIB_MODE appropriately -ifeq ($(MAKECMDGOALS),static_lib) +ifneq ($(filter clean release install, $(MAKECMDGOALS)),) DEBUG_LEVEL=0 endif - -ifeq ($(MAKECMDGOALS),install-static) +ifneq ($(filter dbg, $(MAKECMDGOALS)),) + DEBUG_LEVEL=2 +else ifneq ($(filter shared_lib install-shared, $(MAKECMDGOALS)),) DEBUG_LEVEL=0 -endif - -ifeq ($(MAKECMDGOALS),install) + LIB_MODE=shared +else ifneq ($(filter static_lib install-static, $(MAKECMDGOALS)),) DEBUG_LEVEL=0 -endif - -ifeq ($(MAKECMDGOALS),rocksdbjavastatic) - ifneq ($(DEBUG_LEVEL),2) - DEBUG_LEVEL=0 - endif -endif - -ifeq ($(MAKECMDGOALS),rocksdbjavastaticrelease) - ifneq ($(DEBUG_LEVEL),2) - DEBUG_LEVEL=0 + LIB_MODE=static +else ifneq ($(filter jtest rocksdbjava%, $(MAKECMDGOALS)),) + OBJ_DIR=jl + LIB_MODE=shared + ifneq ($(findstring rocksdbjavastatic, $(MAKECMDGOALS)),) + OBJ_DIR=jls + ifneq ($(DEBUG_LEVEL),2) + DEBUG_LEVEL=0 + endif + ifeq ($(MAKECMDGOALS),rocksdbjavastaticpublish) + DEBUG_LEVEL=0 + endif endif endif -ifeq ($(MAKECMDGOALS),rocksdbjavastaticreleasedocker) - ifneq ($(DEBUG_LEVEL),2) - DEBUG_LEVEL=0 - endif -endif - -ifeq ($(MAKECMDGOALS),rocksdbjavastaticpublish) - DEBUG_LEVEL=0 -endif - $(info $$DEBUG_LEVEL is ${DEBUG_LEVEL}) # Lite build flag. @@ -116,11 +104,14 @@ # Figure out optimize level. ifneq ($(DEBUG_LEVEL), 2) ifeq ($(LITE), 0) - OPT += -O2 + OPTIMIZE_LEVEL ?= -O2 else - OPT += -Os + OPTIMIZE_LEVEL ?= -Os endif endif +# `OPTIMIZE_LEVEL` is empty when the user does not set it and `DEBUG_LEVEL=2`. +# In that case, the compiler default (`-O0` for gcc and clang) will be used. +OPT += $(OPTIMIZE_LEVEL) # compile with -O2 if debug level is not 2 ifneq ($(DEBUG_LEVEL), 2) @@ -143,10 +134,10 @@ HAVE_POWER8=1 endif -ifeq (,$(shell $(CXX) -fsyntax-only -march=armv8-a+crc+crypto -xc /dev/null 2>&1)) -CXXFLAGS += -march=armv8-a+crc+crypto -CFLAGS += -march=armv8-a+crc+crypto -ARMCRC_SOURCE=1 +# if we're compiling for shared libraries, add the shared flags +ifeq ($(LIB_MODE),shared) +CXXFLAGS += $(PLATFORM_SHARED_CFLAGS) -DROCKSDB_DLL +CFLAGS += $(PLATFORM_SHARED_CFLAGS) -DROCKSDB_DLL endif # if we're compiling for release, compile without debug code (-DNDEBUG) @@ -165,13 +156,35 @@ CXXFLAGS += -fno-rtti endif +ifdef ASSERT_STATUS_CHECKED +# For ASC, turn off constructor elision, preventing the case where a constructor returned +# by a method may pass the ASC check if the status is checked in the inner method. Forcing +# the copy constructor to be invoked disables the optimization and will cause the calling method +# to check the status in order to prevent an error from being raised. +PLATFORM_CXXFLAGS += -fno-elide-constructors +ifeq ($(filter -DROCKSDB_ASSERT_STATUS_CHECKED,$(OPT)),) + OPT += -DROCKSDB_ASSERT_STATUS_CHECKED +endif +endif + $(warning Warning: Compiling in debug mode. Don't use the resulting binary in production) endif +# `USE_LTO=1` enables link-time optimizations. Among other things, this enables +# more devirtualization opportunities and inlining across translation units. +# This can save significant overhead introduced by RocksDB's pluggable +# interfaces/internal abstractions, like in the iterator hierarchy. It works +# better when combined with profile-guided optimizations (not currently +# supported natively in Makefile). +ifeq ($(USE_LTO), 1) + CXXFLAGS += -flto + LDFLAGS += -flto -fuse-linker-plugin +endif + #----------------------------------------------- include src.mk -AM_DEFAULT_VERBOSITY = 0 +AM_DEFAULT_VERBOSITY ?= 0 AM_V_GEN = $(am__v_GEN_$(V)) am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY)) @@ -186,12 +199,16 @@ am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY)) am__v_CC_0 = @echo " CC " $@; am__v_CC_1 = -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ + AM_V_CCLD = $(am__v_CCLD_$(V)) am__v_CCLD_ = $(am__v_CCLD_$(AM_DEFAULT_VERBOSITY)) +ifneq ($(SKIP_LINK), 1) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = +else +am__v_CCLD_0 = @echo " !CCLD " $@; true skip +am__v_CCLD_1 = true skip +endif AM_V_AR = $(am__v_AR_$(V)) am__v_AR_ = $(am__v_AR_$(AM_DEFAULT_VERBOSITY)) am__v_AR_0 = @echo " AR " $@; @@ -199,15 +216,66 @@ ifdef ROCKSDB_USE_LIBRADOS LIB_SOURCES += utilities/env_librados.cc +TEST_MAIN_SOURCES += utilities/env_librados_test.cc LDFLAGS += -lrados endif -AM_LINK = $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -# detect what platform we're building on -dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; export PORTABLE="$(PORTABLE)"; "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk")) +AM_LINK = $(AM_V_CCLD)$(CXX) -L. $(patsubst lib%.a, -l%, $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^)) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +AM_SHARE = $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$@ -L. $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^) $(LDFLAGS) -o $@ + +# Detect what platform we're building on. +# Export some common variables that might have been passed as Make variables +# instead of environment variables. +dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \ + export CXXFLAGS="$(EXTRA_CXXFLAGS)"; \ + export LDFLAGS="$(EXTRA_LDFLAGS)"; \ + export COMPILE_WITH_ASAN="$(COMPILE_WITH_ASAN)"; \ + export COMPILE_WITH_TSAN="$(COMPILE_WITH_TSAN)"; \ + export COMPILE_WITH_UBSAN="$(COMPILE_WITH_UBSAN)"; \ + export PORTABLE="$(PORTABLE)"; \ + export ROCKSDB_NO_FBCODE="$(ROCKSDB_NO_FBCODE)"; \ + export USE_CLANG="$(USE_CLANG)"; \ + "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk")) # this file is generated by the previous line to set build flags and sources include make_config.mk -CLEAN_FILES += make_config.mk + +ROCKSDB_PLUGIN_MKS = $(foreach plugin, $(ROCKSDB_PLUGINS), plugin/$(plugin)/*.mk) +include $(ROCKSDB_PLUGIN_MKS) +ROCKSDB_PLUGIN_SOURCES = $(foreach plugin, $(ROCKSDB_PLUGINS), $(foreach source, $($(plugin)_SOURCES), plugin/$(plugin)/$(source))) +ROCKSDB_PLUGIN_HEADERS = $(foreach plugin, $(ROCKSDB_PLUGINS), $(foreach header, $($(plugin)_HEADERS), plugin/$(plugin)/$(header))) +ROCKSDB_PLUGIN_PKGCONFIG_REQUIRES = $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_PKGCONFIG_REQUIRES)) +PLATFORM_LDFLAGS += $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_LDFLAGS)) +CXXFLAGS += $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_CXXFLAGS)) + +ifneq ($(strip $(ROCKSDB_PLUGIN_PKGCONFIG_REQUIRES)),) +LDFLAGS := $(LDFLAGS) $(shell pkg-config --libs $(ROCKSDB_PLUGIN_PKGCONFIG_REQUIRES)) +ifneq ($(.SHELLSTATUS),0) +$(error pkg-config failed) +endif +CXXFLAGS := $(CXXFLAGS) $(shell pkg-config --cflags $(ROCKSDB_PLUGIN_PKGCONFIG_REQUIRES)) +ifneq ($(.SHELLSTATUS),0) +$(error pkg-config failed) +endif +endif + +CXXFLAGS += $(ARCHFLAG) + +ifeq (,$(shell $(CXX) -fsyntax-only -march=armv8-a+crc+crypto -xc /dev/null 2>&1)) +ifneq ($(PLATFORM),OS_MACOSX) +CXXFLAGS += -march=armv8-a+crc+crypto +CFLAGS += -march=armv8-a+crc+crypto +ARMCRC_SOURCE=1 +endif +endif + +export JAVAC_ARGS +CLEAN_FILES += make_config.mk rocksdb.pc + +ifeq ($(V), 1) +$(info $(shell uname -a)) +$(info $(shell $(CC) --version)) +$(info $(shell $(CXX) --version)) +endif missing_make_config_paths := $(shell \ grep "\./\S*\|/\S*" -o $(CURDIR)/make_config.mk | \ @@ -216,7 +284,7 @@ done | sort | uniq) $(foreach path, $(missing_make_config_paths), \ - $(warning Warning: $(path) dont exist)) + $(warning Warning: $(path) does not exist)) ifeq ($(PLATFORM), OS_AIX) # no debug info @@ -244,12 +312,37 @@ LUA_PATH = endif +ifeq ($(LIB_MODE),shared) +# So that binaries are executable from build location, in addition to install location +EXEC_LDFLAGS += -Wl,-rpath -Wl,'$$ORIGIN' +endif + +ifeq ($(PLATFORM), OS_MACOSX) +ifeq ($(ARCHFLAG), -arch arm64) +ifneq ($(MACHINE), arm64) +# If we're building on a non-arm64 machine but targeting arm64 Mac, we need to disable +# linking with jemalloc (as it won't be arm64-compatible) and remove some other options +# set during platform detection +DISABLE_JEMALLOC=1 +PLATFORM_CFLAGS := $(filter-out -march=native -DHAVE_SSE42 -DHAVE_AVX2, $(PLATFORM_CFLAGS)) +PLATFORM_CXXFLAGS := $(filter-out -march=native -DHAVE_SSE42 -DHAVE_AVX2, $(PLATFORM_CXXFLAGS)) +endif +endif +endif + # ASAN doesn't work well with jemalloc. If we're compiling with ASAN, we should use regular malloc. ifdef COMPILE_WITH_ASAN DISABLE_JEMALLOC=1 EXEC_LDFLAGS += -fsanitize=address PLATFORM_CCFLAGS += -fsanitize=address PLATFORM_CXXFLAGS += -fsanitize=address +ifeq ($(LIB_MODE),shared) +ifdef USE_CLANG +# Fix false ODR violation; see https://github.com/google/sanitizers/issues/1017 + EXEC_LDFLAGS += -mllvm -asan-use-private-alias=1 + PLATFORM_CXXFLAGS += -mllvm -asan-use-private-alias=1 +endif +endif endif # TSAN doesn't work well with jemalloc. If we're compiling with TSAN, we should use regular malloc. @@ -289,6 +382,12 @@ PLATFORM_CCFLAGS += -DROCKSDB_VALGRIND_RUN PLATFORM_CXXFLAGS += -DROCKSDB_VALGRIND_RUN endif +ifdef ROCKSDB_FULL_VALGRIND_RUN + # Some tests are slow when run under valgrind and are only run when + # explicitly requested via the ROCKSDB_FULL_VALGRIND_RUN compiler flag. + PLATFORM_CCFLAGS += -DROCKSDB_VALGRIND_RUN -DROCKSDB_FULL_VALGRIND_RUN + PLATFORM_CXXFLAGS += -DROCKSDB_VALGRIND_RUN -DROCKSDB_FULL_VALGRIND_RUN +endif ifndef DISABLE_JEMALLOC ifdef JEMALLOC @@ -308,9 +407,14 @@ USE_FOLLY_DISTRIBUTED_MUTEX=0 endif -export GTEST_THROW_ON_FAILURE=1 -export GTEST_HAS_EXCEPTIONS=1 -GTEST_DIR = ./third-party/gtest-1.8.1/fused-src +ifndef GTEST_THROW_ON_FAILURE + export GTEST_THROW_ON_FAILURE=1 +endif +ifndef GTEST_HAS_EXCEPTIONS + export GTEST_HAS_EXCEPTIONS=1 +endif + +GTEST_DIR = third-party/gtest-1.8.1/fused-src # AIX: pre-defined system headers are surrounded by an extern "C" block ifeq ($(PLATFORM), OS_AIX) PLATFORM_CCFLAGS += -I$(GTEST_DIR) @@ -336,6 +440,14 @@ PLATFORM_CCFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE) PLATFORM_CXXFLAGS += -DTEST_CACHE_LINE_SIZE=$(TEST_CACHE_LINE_SIZE) endif +ifdef TEST_UINT128_COMPAT + PLATFORM_CCFLAGS += -DTEST_UINT128_COMPAT=1 + PLATFORM_CXXFLAGS += -DTEST_UINT128_COMPAT=1 +endif +ifdef ROCKSDB_MODIFY_NPHASH + PLATFORM_CCFLAGS += -DROCKSDB_MODIFY_NPHASH=1 + PLATFORM_CXXFLAGS += -DROCKSDB_MODIFY_NPHASH=1 +endif # This (the first rule) must depend on "all". default: all @@ -343,6 +455,15 @@ WARNING_FLAGS = -W -Wextra -Wall -Wsign-compare -Wshadow \ -Wunused-parameter +ifeq (,$(filter amd64, $(MACHINE))) + C_WARNING_FLAGS = -Wstrict-prototypes +endif + +ifdef USE_CLANG + # Used by some teams in Facebook + WARNING_FLAGS += -Wshift-sign-overflow +endif + ifeq ($(PLATFORM), OS_OPENBSD) WARNING_FLAGS += -Wno-unused-lambda-capture endif @@ -382,69 +503,113 @@ CXXFLAGS += -DNO_THREEWAY_CRC32C endif -CFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) +CFLAGS += $(C_WARNING_FLAGS) $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) CXXFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers LDFLAGS += $(PLATFORM_LDFLAGS) -# If NO_UPDATE_BUILD_VERSION is set we don't update util/build_version.cc, but -# the file needs to already exist or else the build will fail -ifndef NO_UPDATE_BUILD_VERSION -date := $(shell date +%F) -ifdef FORCE_GIT_SHA - git_sha := $(FORCE_GIT_SHA) -else - git_sha := $(shell git rev-parse HEAD 2>/dev/null) -endif -gen_build_version = sed -e s/@@GIT_SHA@@/$(git_sha)/ -e s/@@GIT_DATE_TIME@@/$(date)/ util/build_version.cc.in - -# Record the version of the source that we are compiling. -# We keep a record of the git revision in this file. It is then built -# as a regular source file as part of the compilation process. -# One can run "strings executable_filename | grep _build_" to find -# the version of the source that we used to build the executable file. -FORCE: -util/build_version.cc: FORCE - $(AM_V_GEN)rm -f $@-t - $(AM_V_at)$(gen_build_version) > $@-t - $(AM_V_at)if test -f $@; then \ - cmp -s $@-t $@ && rm -f $@-t || mv -f $@-t $@; \ - else mv -f $@-t $@; fi -endif - -LIBOBJECTS = $(LIB_SOURCES:.cc=.o) +LIB_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(LIB_SOURCES)) +LIB_OBJECTS += $(patsubst %.cc, $(OBJ_DIR)/%.o, $(ROCKSDB_PLUGIN_SOURCES)) ifeq ($(HAVE_POWER8),1) -LIB_CC_OBJECTS = $(LIB_SOURCES:.cc=.o) -LIBOBJECTS += $(LIB_SOURCES_C:.c=.o) -LIBOBJECTS += $(LIB_SOURCES_ASM:.S=.o) -else -LIB_CC_OBJECTS = $(LIB_SOURCES:.cc=.o) +LIB_OBJECTS += $(patsubst %.c, $(OBJ_DIR)/%.o, $(LIB_SOURCES_C)) +LIB_OBJECTS += $(patsubst %.S, $(OBJ_DIR)/%.o, $(LIB_SOURCES_ASM)) endif -LIBOBJECTS += $(TOOL_LIB_SOURCES:.cc=.o) -MOCKOBJECTS = $(MOCK_LIB_SOURCES:.cc=.o) ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) - FOLLYOBJECTS = $(FOLLY_SOURCES:.cpp=.o) + LIB_OBJECTS += $(patsubst %.cpp, $(OBJ_DIR)/%.o, $(FOLLY_SOURCES)) +endif + +# range_tree is not compatible with non GNU libc on ppc64 +# see https://jira.percona.com/browse/PS-7559 +ifneq ($(PPC_LIBC_IS_GNU),0) + LIB_OBJECTS += $(patsubst %.cc, $(OBJ_DIR)/%.o, $(RANGE_TREE_SOURCES)) endif -GTEST = $(GTEST_DIR)/gtest/gtest-all.o -TESTUTIL = ./test_util/testutil.o -TESTHARNESS = ./test_util/testharness.o $(TESTUTIL) $(MOCKOBJECTS) $(GTEST) +GTEST = $(OBJ_DIR)/$(GTEST_DIR)/gtest/gtest-all.o +TESTUTIL = $(OBJ_DIR)/test_util/testutil.o +TESTHARNESS = $(OBJ_DIR)/test_util/testharness.o $(TESTUTIL) $(GTEST) VALGRIND_ERROR = 2 VALGRIND_VER := $(join $(VALGRIND_VER),valgrind) VALGRIND_OPTS = --error-exitcode=$(VALGRIND_ERROR) --leak-check=full +# Not yet supported: --show-leak-kinds=definite,possible,reachable --errors-for-leak-kinds=definite,possible,reachable -BENCHTOOLOBJECTS = $(BENCH_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) +TEST_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(TEST_LIB_SOURCES) $(MOCK_LIB_SOURCES)) $(GTEST) +BENCH_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(BENCH_LIB_SOURCES)) +CACHE_BENCH_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(CACHE_BENCH_LIB_SOURCES)) +TOOL_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(TOOL_LIB_SOURCES)) +ANALYZE_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(ANALYZER_LIB_SOURCES)) +STRESS_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(STRESS_LIB_SOURCES)) + +# Exclude build_version.cc -- a generated source file -- from all sources. Not needed for dependencies +ALL_SOURCES = $(filter-out util/build_version.cc, $(LIB_SOURCES)) $(TEST_LIB_SOURCES) $(MOCK_LIB_SOURCES) $(GTEST_DIR)/gtest/gtest-all.cc +ALL_SOURCES += $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(CACHE_BENCH_LIB_SOURCES) $(ANALYZER_LIB_SOURCES) $(STRESS_LIB_SOURCES) +ALL_SOURCES += $(TEST_MAIN_SOURCES) $(TOOL_MAIN_SOURCES) $(BENCH_MAIN_SOURCES) +ALL_SOURCES += $(ROCKSDB_PLUGIN_SOURCES) -ANALYZETOOLOBJECTS = $(ANALYZER_LIB_SOURCES:.cc=.o) +TESTS = $(patsubst %.cc, %, $(notdir $(TEST_MAIN_SOURCES))) +TESTS += $(patsubst %.c, %, $(notdir $(TEST_MAIN_SOURCES_C))) -STRESSTOOLOBJECTS = $(STRESS_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) +ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) + TESTS += folly_synchronization_distributed_mutex_test + ALL_SOURCES += third-party/folly/folly/synchronization/test/DistributedMutexTest.cc +endif + +# `make check-headers` to very that each header file includes its own +# dependencies +ifneq ($(filter check-headers, $(MAKECMDGOALS)),) +# TODO: add/support JNI headers + DEV_HEADER_DIRS := $(sort include/ hdfs/ $(dir $(ALL_SOURCES))) +# Some headers like in port/ are platform-specific + DEV_HEADERS := $(shell $(FIND) $(DEV_HEADER_DIRS) -type f -name '*.h' | egrep -v 'port/|plugin/|lua/|range_tree/|tools/rdb/db_wrapper.h|include/rocksdb/utilities/env_librados.h') +else + DEV_HEADERS := +endif +HEADER_OK_FILES = $(patsubst %.h, %.h.ok, $(DEV_HEADERS)) + +AM_V_CCH = $(am__v_CCH_$(V)) +am__v_CCH_ = $(am__v_CCH_$(AM_DEFAULT_VERBOSITY)) +am__v_CCH_0 = @echo " CC.h " $<; +am__v_CCH_1 = + +%.h.ok: %.h # .h.ok not actually created, so re-checked on each invocation +# -DROCKSDB_NAMESPACE=42 ensures the namespace header is included + $(AM_V_CCH) echo '#include "$<"' | $(CXX) $(CXXFLAGS) -DROCKSDB_NAMESPACE=42 -x c++ -c - -o /dev/null + +check-headers: $(HEADER_OK_FILES) + +# options_settable_test doesn't pass with UBSAN as we use hack in the test +ifdef COMPILE_WITH_UBSAN + TESTS := $(shell echo $(TESTS) | sed 's/\boptions_settable_test\b//g') +endif +ifdef ASSERT_STATUS_CHECKED + # TODO: finish fixing all tests to pass this check + TESTS_FAILING_ASC = \ + c_test \ + env_test \ + range_locking_test \ + testutil_test \ + + # Since we have very few ASC exclusions left, excluding them from + # the build is the most convenient way to exclude them from testing + TESTS := $(filter-out $(TESTS_FAILING_ASC),$(TESTS)) +endif + +ROCKSDBTESTS_SUBSET ?= $(TESTS) + +# env_test - suspicious use of test::TmpDir +# deletefile_test - serial because it generates giant temporary files in +# its various tests. Parallel can fill up your /dev/shm +NON_PARALLEL_TEST = \ + env_test \ + deletefile_test \ -EXPOBJECTS = $(LIBOBJECTS) $(TESTUTIL) +PARALLEL_TEST = $(filter-out $(NON_PARALLEL_TEST), $(TESTS)) -TESTS = \ +# Not necessarily well thought out or up-to-date, but matches old list +TESTS_PLATFORM_DEPENDENT := \ db_basic_test \ + db_blob_basic_test \ db_encryption_test \ db_test2 \ external_sst_file_basic_test \ @@ -459,220 +624,115 @@ env_basic_test \ env_test \ env_logger_test \ + io_posix_test \ hash_test \ random_test \ + ribbon_test \ thread_local_test \ + work_queue_test \ rate_limiter_test \ perf_context_test \ iostats_context_test \ db_wal_test \ - db_block_cache_test \ - db_test \ - db_blob_index_test \ - db_iter_test \ - db_iter_stress_test \ - db_log_iter_test \ - db_bloom_filter_test \ - db_compaction_filter_test \ - db_compaction_test \ - db_dynamic_level_test \ - db_flush_test \ - db_inplace_update_test \ - db_iterator_test \ - db_memtable_test \ - db_merge_operator_test \ - db_merge_operand_test \ - db_options_test \ - db_range_del_test \ - db_secondary_test \ - db_sst_test \ - db_tailing_iter_test \ - db_io_failure_test \ - db_properties_test \ - db_table_properties_test \ - db_statistics_test \ - db_write_test \ - error_handler_test \ - autovector_test \ - blob_db_test \ - cleanable_test \ - column_family_test \ - table_properties_collector_test \ - arena_test \ - block_test \ - data_block_hash_index_test \ - cache_test \ - corruption_test \ - slice_test \ - slice_transform_test \ - dbformat_test \ - fault_injection_test \ - filelock_test \ - filename_test \ - file_reader_writer_test \ - block_based_filter_block_test \ - full_filter_block_test \ - partitioned_filter_block_test \ - hash_table_test \ - histogram_test \ - log_test \ - manual_compaction_test \ - mock_env_test \ - memtable_list_test \ - merge_helper_test \ - memory_test \ - merge_test \ - merger_test \ - util_merge_operators_test \ - options_file_test \ - reduce_levels_test \ - plain_table_db_test \ - comparator_db_test \ - external_sst_file_test \ - import_column_family_test \ - prefix_test \ - skiplist_test \ - write_buffer_manager_test \ - stringappend_test \ - cassandra_format_test \ - cassandra_functional_test \ - cassandra_row_merge_test \ - cassandra_serialize_test \ - ttl_test \ - backupable_db_test \ - cache_simulator_test \ - sim_cache_test \ - version_edit_test \ - version_set_test \ - compaction_picker_test \ - version_builder_test \ - file_indexer_test \ - write_batch_test \ - write_batch_with_index_test \ - write_controller_test\ - deletefile_test \ - obsolete_files_test \ - table_test \ - delete_scheduler_test \ - options_test \ - options_settable_test \ - options_util_test \ - event_logger_test \ - timer_queue_test \ - cuckoo_table_builder_test \ - cuckoo_table_reader_test \ - cuckoo_table_db_test \ - flush_job_test \ - wal_manager_test \ - listener_test \ - compaction_iterator_test \ - compaction_job_test \ - thread_list_test \ - sst_dump_test \ - compact_files_test \ - optimistic_transaction_test \ - write_callback_test \ - heap_test \ - compact_on_deletion_collector_test \ - compaction_job_stats_test \ - option_change_migration_test \ - transaction_test \ - ldb_cmd_test \ - persistent_cache_test \ - statistics_test \ - stats_history_test \ - lru_cache_test \ - object_registry_test \ - repair_test \ - env_timed_test \ - write_prepared_transaction_test \ - write_unprepared_transaction_test \ - db_universal_compaction_test \ - trace_analyzer_test \ - repeatable_thread_test \ - range_tombstone_fragmenter_test \ - range_del_aggregator_test \ - sst_file_reader_test \ - db_secondary_test \ - block_cache_tracer_test \ - block_cache_trace_analyzer_test \ - defer_test \ - -ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) - TESTS += folly_synchronization_distributed_mutex_test -endif -PARALLEL_TEST = \ - backupable_db_test \ - db_bloom_filter_test \ - db_compaction_filter_test \ - db_compaction_test \ - db_merge_operator_test \ - db_sst_test \ - db_test \ - db_universal_compaction_test \ - db_wal_test \ - external_sst_file_test \ - import_column_family_test \ - fault_injection_test \ - file_reader_writer_test \ - inlineskiplist_test \ - manual_compaction_test \ - persistent_cache_test \ - table_test \ - transaction_test \ - write_prepared_transaction_test \ - write_unprepared_transaction_test \ +# Sort ROCKSDBTESTS_SUBSET for filtering, except db_test is special (expensive) +# so is placed first (out-of-order) +ROCKSDBTESTS_SUBSET := $(filter db_test, $(ROCKSDBTESTS_SUBSET)) $(sort $(filter-out db_test, $(ROCKSDBTESTS_SUBSET))) -# options_settable_test doesn't pass with UBSAN as we use hack in the test -ifdef COMPILE_WITH_UBSAN - TESTS := $(shell echo $(TESTS) | sed 's/\boptions_settable_test\b//g') -endif -SUBSET := $(TESTS) ifdef ROCKSDBTESTS_START - SUBSET := $(shell echo $(SUBSET) | sed 's/^.*$(ROCKSDBTESTS_START)/$(ROCKSDBTESTS_START)/') + ROCKSDBTESTS_SUBSET := $(shell echo $(ROCKSDBTESTS_SUBSET) | sed 's/^.*$(ROCKSDBTESTS_START)/$(ROCKSDBTESTS_START)/') endif ifdef ROCKSDBTESTS_END - SUBSET := $(shell echo $(SUBSET) | sed 's/$(ROCKSDBTESTS_END).*//') + ROCKSDBTESTS_SUBSET := $(shell echo $(ROCKSDBTESTS_SUBSET) | sed 's/$(ROCKSDBTESTS_END).*//') endif -TOOLS = \ - sst_dump \ - db_sanity_test \ - db_stress \ - write_stress \ - ldb \ - db_repl_stress \ - rocksdb_dump \ - rocksdb_undump \ - blob_dump \ - trace_analyzer \ - block_cache_trace_analyzer \ +ifeq ($(ROCKSDBTESTS_PLATFORM_DEPENDENT), only) + ROCKSDBTESTS_SUBSET := $(filter $(TESTS_PLATFORM_DEPENDENT), $(ROCKSDBTESTS_SUBSET)) +else ifeq ($(ROCKSDBTESTS_PLATFORM_DEPENDENT), exclude) + ROCKSDBTESTS_SUBSET := $(filter-out $(TESTS_PLATFORM_DEPENDENT), $(ROCKSDBTESTS_SUBSET)) +endif + +# bench_tool_analyer main is in bench_tool_analyzer_tool, or this would be simpler... +TOOLS = $(patsubst %.cc, %, $(notdir $(patsubst %_tool.cc, %.cc, $(TOOLS_MAIN_SOURCES)))) TEST_LIBS = \ librocksdb_env_basic_test.a # TODO: add back forward_iterator_bench, after making it build in all environemnts. -BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench filter_bench persistent_cache_bench range_del_aggregator_bench +BENCHMARKS = $(patsubst %.cc, %, $(notdir $(BENCH_MAIN_SOURCES))) + +MICROBENCHS = $(patsubst %.cc, %, $(notdir $(MICROBENCH_SOURCES))) # if user didn't config LIBNAME, set the default ifeq ($(LIBNAME),) + LIBNAME=librocksdb # we should only run rocksdb in production with DEBUG_LEVEL 0 -ifeq ($(DEBUG_LEVEL),0) - LIBNAME=librocksdb -else - LIBNAME=librocksdb_debug +ifneq ($(DEBUG_LEVEL),0) + LIBDEBUG=_debug endif endif -LIBRARY = ${LIBNAME}.a -TOOLS_LIBRARY = ${LIBNAME}_tools.a -STRESS_LIBRARY = ${LIBNAME}_stress.a +STATIC_LIBRARY = ${LIBNAME}$(LIBDEBUG).a +STATIC_TEST_LIBRARY = ${LIBNAME}_test$(LIBDEBUG).a +STATIC_TOOLS_LIBRARY = ${LIBNAME}_tools$(LIBDEBUG).a +STATIC_STRESS_LIBRARY = ${LIBNAME}_stress$(LIBDEBUG).a + +ALL_STATIC_LIBS = $(STATIC_LIBRARY) $(STATIC_TEST_LIBRARY) $(STATIC_TOOLS_LIBRARY) $(STATIC_STRESS_LIBRARY) + +SHARED_TEST_LIBRARY = ${LIBNAME}_test$(LIBDEBUG).$(PLATFORM_SHARED_EXT) +SHARED_TOOLS_LIBRARY = ${LIBNAME}_tools$(LIBDEBUG).$(PLATFORM_SHARED_EXT) +SHARED_STRESS_LIBRARY = ${LIBNAME}_stress$(LIBDEBUG).$(PLATFORM_SHARED_EXT) + +ALL_SHARED_LIBS = $(SHARED1) $(SHARED2) $(SHARED3) $(SHARED4) $(SHARED_TEST_LIBRARY) $(SHARED_TOOLS_LIBRARY) $(SHARED_STRESS_LIBRARY) + +ifeq ($(LIB_MODE),shared) +LIBRARY=$(SHARED1) +TEST_LIBRARY=$(SHARED_TEST_LIBRARY) +TOOLS_LIBRARY=$(SHARED_TOOLS_LIBRARY) +STRESS_LIBRARY=$(SHARED_STRESS_LIBRARY) +CLOUD_LIBRARY=$(SHARED_CLOUD_LIBRARY) +else +LIBRARY=$(STATIC_LIBRARY) +TEST_LIBRARY=$(STATIC_TEST_LIBRARY) +TOOLS_LIBRARY=$(STATIC_TOOLS_LIBRARY) +endif +STRESS_LIBRARY=$(STATIC_STRESS_LIBRARY) ROCKSDB_MAJOR = $(shell egrep "ROCKSDB_MAJOR.[0-9]" include/rocksdb/version.h | cut -d ' ' -f 3) ROCKSDB_MINOR = $(shell egrep "ROCKSDB_MINOR.[0-9]" include/rocksdb/version.h | cut -d ' ' -f 3) ROCKSDB_PATCH = $(shell egrep "ROCKSDB_PATCH.[0-9]" include/rocksdb/version.h | cut -d ' ' -f 3) +# If NO_UPDATE_BUILD_VERSION is set we don't update util/build_version.cc, but +# the file needs to already exist or else the build will fail +ifndef NO_UPDATE_BUILD_VERSION + +# By default, use the current date-time as the date. If there are no changes, +# we will use the last commit date instead. +build_date := $(shell date "+%Y-%m-%d %T") + +ifdef FORCE_GIT_SHA + git_sha := $(FORCE_GIT_SHA) + git_mod := 1 + git_date := $(build_date) +else + git_sha := $(shell git rev-parse HEAD 2>/dev/null) + git_tag := $(shell git symbolic-ref -q --short HEAD 2> /dev/null || git describe --tags --exact-match 2>/dev/null) + git_mod := $(shell git diff-index HEAD --quiet 2>/dev/null; echo $$?) + git_date := $(shell git log -1 --date=format:"%Y-%m-%d %T" --format="%ad" 2>/dev/null) +endif +gen_build_version = sed -e s/@GIT_SHA@/$(git_sha)/ -e s:@GIT_TAG@:"$(git_tag)": -e s/@GIT_MOD@/"$(git_mod)"/ -e s/@BUILD_DATE@/"$(build_date)"/ -e s/@GIT_DATE@/"$(git_date)"/ util/build_version.cc.in + +# Record the version of the source that we are compiling. +# We keep a record of the git revision in this file. It is then built +# as a regular source file as part of the compilation process. +# One can run "strings executable_filename | grep _build_" to find +# the version of the source that we used to build the executable file. +util/build_version.cc: $(filter-out $(OBJ_DIR)/util/build_version.o, $(LIB_OBJECTS)) util/build_version.cc.in + $(AM_V_GEN)rm -f $@-t + $(AM_V_at)$(gen_build_version) > $@ +endif +CLEAN_FILES += util/build_version.cc + default: all #----------------------------------------------- @@ -681,7 +741,7 @@ ifneq ($(PLATFORM_SHARED_EXT),) ifneq ($(PLATFORM_SHARED_VERSIONED),true) -SHARED1 = ${LIBNAME}.$(PLATFORM_SHARED_EXT) +SHARED1 = ${LIBNAME}$(LIBDEBUG).$(PLATFORM_SHARED_EXT) SHARED2 = $(SHARED1) SHARED3 = $(SHARED1) SHARED4 = $(SHARED1) @@ -692,7 +752,7 @@ SHARED_PATCH = $(ROCKSDB_PATCH) SHARED1 = ${LIBNAME}.$(PLATFORM_SHARED_EXT) ifeq ($(PLATFORM), OS_MACOSX) -SHARED_OSX = $(LIBNAME).$(SHARED_MAJOR) +SHARED_OSX = $(LIBNAME)$(LIBDEBUG).$(SHARED_MAJOR) SHARED2 = $(SHARED_OSX).$(PLATFORM_SHARED_EXT) SHARED3 = $(SHARED_OSX).$(SHARED_MINOR).$(PLATFORM_SHARED_EXT) SHARED4 = $(SHARED_OSX).$(SHARED_MINOR).$(SHARED_PATCH).$(PLATFORM_SHARED_EXT) @@ -700,61 +760,35 @@ SHARED2 = $(SHARED1).$(SHARED_MAJOR) SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR) SHARED4 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR).$(SHARED_PATCH) -endif +endif # MACOSX SHARED = $(SHARED1) $(SHARED2) $(SHARED3) $(SHARED4) -$(SHARED1): $(SHARED4) +$(SHARED1): $(SHARED4) $(SHARED2) ln -fs $(SHARED4) $(SHARED1) -$(SHARED2): $(SHARED4) +$(SHARED2): $(SHARED4) $(SHARED3) ln -fs $(SHARED4) $(SHARED2) $(SHARED3): $(SHARED4) ln -fs $(SHARED4) $(SHARED3) -endif -ifeq ($(HAVE_POWER8),1) -SHARED_C_OBJECTS = $(LIB_SOURCES_C:.c=.o) -SHARED_ASM_OBJECTS = $(LIB_SOURCES_ASM:.S=.o) -SHARED_C_LIBOBJECTS = $(patsubst %.o,shared-objects/%.o,$(SHARED_C_OBJECTS)) -SHARED_ASM_LIBOBJECTS = $(patsubst %.o,shared-objects/%.o,$(SHARED_ASM_OBJECTS)) -shared_libobjects = $(patsubst %,shared-objects/%,$(LIB_CC_OBJECTS)) -else -shared_libobjects = $(patsubst %,shared-objects/%,$(LIBOBJECTS)) -endif - -CLEAN_FILES += shared-objects -shared_all_libobjects = $(shared_libobjects) - -ifeq ($(HAVE_POWER8),1) -shared-ppc-objects = $(SHARED_C_LIBOBJECTS) $(SHARED_ASM_LIBOBJECTS) - -shared-objects/util/crc32c_ppc.o: util/crc32c_ppc.c - $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ - -shared-objects/util/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S - $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ -endif -$(shared_libobjects): shared-objects/%.o: %.cc - $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@ - -ifeq ($(HAVE_POWER8),1) -shared_all_libobjects = $(shared_libobjects) $(shared-ppc-objects) -endif -$(SHARED4): $(shared_all_libobjects) - $(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(shared_all_libobjects) $(LDFLAGS) -o $@ +endif # PLATFORM_SHARED_VERSIONED +$(SHARED4): $(LIB_OBJECTS) + $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(LIB_OBJECTS) $(LDFLAGS) -o $@ endif # PLATFORM_SHARED_EXT .PHONY: blackbox_crash_test check clean coverage crash_test ldb_tests package \ release tags tags0 valgrind_check whitebox_crash_test format static_lib shared_lib all \ - dbg rocksdbjavastatic rocksdbjava install install-static install-shared uninstall \ - analyze tools tools_lib \ + dbg rocksdbjavastatic rocksdbjava gen-pc install install-static install-shared uninstall \ + analyze tools tools_lib check-headers \ blackbox_crash_test_with_atomic_flush whitebox_crash_test_with_atomic_flush \ - blackbox_crash_test_with_txn whitebox_crash_test_with_txn + blackbox_crash_test_with_txn whitebox_crash_test_with_txn \ + blackbox_crash_test_with_best_efforts_recovery \ + blackbox_crash_test_with_ts whitebox_crash_test_with_ts all: $(LIBRARY) $(BENCHMARKS) tools tools_lib test_libs $(TESTS) -all_but_some_tests: $(LIBRARY) $(BENCHMARKS) tools tools_lib test_libs $(SUBSET) +all_but_some_tests: $(LIBRARY) $(BENCHMARKS) tools tools_lib test_libs $(ROCKSDBTESTS_SUBSET) -static_lib: $(LIBRARY) +static_lib: $(STATIC_LIBRARY) shared_lib: $(SHARED) @@ -766,19 +800,22 @@ test_libs: $(TEST_LIBS) +benchmarks: $(BENCHMARKS) + +microbench: $(MICROBENCHS) + for t in $(MICROBENCHS); do echo "===== Running benchmark $$t (`date`)"; ./$$t || exit 1; done; + dbg: $(LIBRARY) $(BENCHMARKS) tools $(TESTS) -# creates static library and programs -release: - $(MAKE) clean - DEBUG_LEVEL=0 $(MAKE) static_lib tools db_bench +# creates library and programs +release: clean + LIB_MODE=$(LIB_MODE) DEBUG_LEVEL=0 $(MAKE) $(LIBRARY) tools db_bench -coverage: - $(MAKE) clean +coverage: clean COVERAGEFLAGS="-fprofile-arcs -ftest-coverage" LDFLAGS+="-lgcov" $(MAKE) J=1 all check cd coverage && ./coverage_test.sh # Delete intermediate files - $(FIND) . -type f -regex ".*\.\(\(gcda\)\|\(gcno\)\)" -exec rm {} \; + $(FIND) . -type f -regex ".*\.\(\(gcda\)\|\(gcno\)\)" -exec rm -f {} \; ifneq (,$(filter check parallel_check,$(MAKECMDGOALS)),) # Use /dev/shm if it has the sticky bit set (otherwise, /tmp), @@ -824,14 +861,11 @@ $(parallel_tests): $(PARALLEL_TEST) $(AM_V_at)TEST_BINARY=$(patsubst parallel_%,%,$@); \ TEST_NAMES=` \ - ./$$TEST_BINARY --gtest_list_tests \ - | perl -n \ - -e 's/ *\#.*//;' \ - -e '/^(\s*)(\S+)/; !$$1 and do {$$p=$$2; break};' \ - -e 'print qq! $$p$$2!'`; \ + (./$$TEST_BINARY --gtest_list_tests || echo " $${TEST_BINARY}__list_tests_failure") \ + | awk '/^[^ ]/ { prefix = $$1 } /^[ ]/ { print prefix $$1 }'`; \ + echo " Generating parallel test scripts for $$TEST_BINARY"; \ for TEST_NAME in $$TEST_NAMES; do \ TEST_SCRIPT=t/run-$$TEST_BINARY-$${TEST_NAME//\//-}; \ - echo " GEN " $$TEST_SCRIPT; \ printf '%s\n' \ '#!/bin/sh' \ "d=\$(TMPD)$$TEST_SCRIPT" \ @@ -843,7 +877,7 @@ gen_parallel_tests: $(AM_V_at)mkdir -p t - $(AM_V_at)rm -f t/run-* + $(AM_V_at)$(FIND) t -type f -name 'run-*' -exec rm -f {} \; $(MAKE) $(parallel_tests) # Reorder input lines (which are one per test) so that the @@ -863,7 +897,7 @@ # 107.816 PASS t/DBTest.EncodeDecompressedBlockSizeTest # slow_test_regexp = \ - ^.*SnapshotConcurrentAccessTest.*$$|^t/run-table_test-HarnessTest.Randomized$$|^t/run-db_test-.*(?:FileCreationRandomFailure|EncodeDecompressedBlockSizeTest)$$|^.*RecoverFromCorruptedWALWithoutFlush$$ + ^.*SnapshotConcurrentAccessTest.*$$|^.*SeqAdvanceConcurrentTest.*$$|^t/run-table_test-HarnessTest.Randomized$$|^t/run-db_test-.*(?:FileCreationRandomFailure|EncodeDecompressedBlockSizeTest)$$|^.*RecoverFromCorruptedWALWithoutFlush$$ prioritize_long_running_tests = \ perl -pe 's,($(slow_test_regexp)),100 $$1,' \ | sort -k1,1gr \ @@ -878,6 +912,19 @@ # Use this regexp to select the subset of tests whose names match. tests-regexp = . +EXCLUDE_TESTS_REGEX ?= "^$$" + +ifeq ($(PRINT_PARALLEL_OUTPUTS), 1) + parallel_redir = +else ifeq ($(QUIET_PARALLEL_TESTS), 1) + parallel_redir = >& t/$(test_log_prefix)log-{/} +else +# Default: print failure output only, as it happens +# Note: gnu_parallel --eta is now always used, but has been modified to provide +# only infrequent updates when not connected to a terminal. (CircleCI will +# kill a job if no output for 10min.) + parallel_redir = >& t/$(test_log_prefix)log-{/} || bash -c "cat t/$(test_log_prefix)log-{/}; exit $$?" +endif .PHONY: check_0 check_0: @@ -885,34 +932,38 @@ printf '%s\n' '' \ 'To monitor subtest ,' \ ' run "make watch-log" in a separate window' ''; \ - test -t 1 && eta=--eta || eta=; \ { \ printf './%s\n' $(filter-out $(PARALLEL_TEST),$(TESTS)); \ find t -name 'run-*' -print; \ } \ | $(prioritize_long_running_tests) \ | grep -E '$(tests-regexp)' \ - | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG $$eta --gnu '{} >& t/log-{/}' + | grep -E -v '$(EXCLUDE_TESTS_REGEX)' \ + | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG --eta --gnu '{} $(parallel_redir)' ; \ + parallel_retcode=$$? ; \ + awk '{ if ($$7 != 0 || $$8 != 0) { if ($$7 == "Exitval") { h = $$0; } else { if (!f) print h; print; f = 1 } } } END { if(f) exit 1; }' < LOG ; \ + awk_retcode=$$?; \ + if [ $$parallel_retcode -ne 0 ] || [ $$awk_retcode -ne 0 ] ; then exit 1 ; fi -valgrind-blacklist-regexp = InlineSkipTest.ConcurrentInsert|TransactionStressTest.DeadlockStress|DBCompactionTest.SuggestCompactRangeNoTwoLevel0Compactions|BackupableDBTest.RateLimiting|DBTest.CloseSpeedup|DBTest.ThreadStatusFlush|DBTest.RateLimitingTest|DBTest.EncodeDecompressedBlockSizeTest|FaultInjectionTest.UninstalledCompaction|HarnessTest.Randomized|ExternalSSTFileTest.CompactDuringAddFileRandom|ExternalSSTFileTest.IngestFileWithGlobalSeqnoRandomized|MySQLStyleTransactionTest.TransactionStressTest +valgrind-exclude-regexp = InlineSkipTest.ConcurrentInsert|TransactionStressTest.DeadlockStress|DBCompactionTest.SuggestCompactRangeNoTwoLevel0Compactions|BackupableDBTest.RateLimiting|DBTest.CloseSpeedup|DBTest.ThreadStatusFlush|DBTest.RateLimitingTest|DBTest.EncodeDecompressedBlockSizeTest|FaultInjectionTest.UninstalledCompaction|HarnessTest.Randomized|ExternalSSTFileTest.CompactDuringAddFileRandom|ExternalSSTFileTest.IngestFileWithGlobalSeqnoRandomized|MySQLStyleTransactionTest.TransactionStressTest .PHONY: valgrind_check_0 +valgrind_check_0: test_log_prefix := valgrind_ valgrind_check_0: $(AM_V_GEN)export TEST_TMPDIR=$(TMPD); \ printf '%s\n' '' \ 'To monitor subtest ,' \ ' run "make watch-log" in a separate window' ''; \ - test -t 1 && eta=--eta || eta=; \ { \ printf './%s\n' $(filter-out $(PARALLEL_TEST) %skiplist_test options_settable_test, $(TESTS)); \ find t -name 'run-*' -print; \ } \ | $(prioritize_long_running_tests) \ | grep -E '$(tests-regexp)' \ - | grep -E -v '$(valgrind-blacklist-regexp)' \ - | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG $$eta --gnu \ - '(if [[ "{}" == "./"* ]] ; then $(DRIVER) {}; else {}; fi) ' \ - '>& t/valgrind_log-{/}' + | grep -E -v '$(valgrind-exclude-regexp)' \ + | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG --eta --gnu \ + '(if [[ "{}" == "./"* ]] ; then $(DRIVER) {}; else {}; fi) \ + $(parallel_redir)' \ CLEAN_FILES += t LOG $(TMPD) @@ -926,6 +977,9 @@ watch-log: $(WATCH) --interval=0 'sort -k7,7nr -k4,4gr LOG|$(quoted_perl_command)' +dump-log: + bash -c '$(quoted_perl_command)' < LOG + # If J != 1 and GNU parallel is installed, run the tests in parallel, # via the check_0 rule above. Otherwise, run them sequentially. check: all @@ -937,102 +991,160 @@ $(MAKE) T="$$t" TMPD=$(TMPD) check_0; \ else \ for t in $(TESTS); do \ - echo "===== Running $$t"; ./$$t || exit 1; done; \ + echo "===== Running $$t (`date`)"; ./$$t || exit 1; done; \ fi rm -rf $(TMPD) ifneq ($(PLATFORM), OS_AIX) - python tools/check_all_python.py + $(PYTHON) tools/check_all_python.py ifeq ($(filter -DROCKSDB_LITE,$(OPT)),) - python tools/ldb_test.py +ifndef ASSERT_STATUS_CHECKED # not yet working with these tests + $(PYTHON) tools/ldb_test.py sh tools/rocksdb_dump_test.sh endif endif +endif +ifndef SKIP_FORMAT_BUCK_CHECKS + $(MAKE) check-format + $(MAKE) check-buck-targets + $(MAKE) check-sources +endif # TODO add ldb_tests -check_some: $(SUBSET) - for t in $(SUBSET); do echo "===== Running $$t"; ./$$t || exit 1; done +check_some: $(ROCKSDBTESTS_SUBSET) + for t in $(ROCKSDBTESTS_SUBSET); do echo "===== Running $$t (`date`)"; ./$$t || exit 1; done .PHONY: ldb_tests ldb_tests: ldb - python tools/ldb_test.py - -crash_test: whitebox_crash_test blackbox_crash_test + $(PYTHON) tools/ldb_test.py -crash_test_with_atomic_flush: whitebox_crash_test_with_atomic_flush blackbox_crash_test_with_atomic_flush - -crash_test_with_txn: whitebox_crash_test_with_txn blackbox_crash_test_with_txn +crash_test: +# Do not parallelize + $(MAKE) whitebox_crash_test + $(MAKE) blackbox_crash_test + +crash_test_with_atomic_flush: +# Do not parallelize + $(MAKE) whitebox_crash_test_with_atomic_flush + $(MAKE) blackbox_crash_test_with_atomic_flush + +crash_test_with_txn: +# Do not parallelize + $(MAKE) whitebox_crash_test_with_txn + $(MAKE) blackbox_crash_test_with_txn + +crash_test_with_best_efforts_recovery: blackbox_crash_test_with_best_efforts_recovery + +crash_test_with_ts: +# Do not parallelize + $(MAKE) whitebox_crash_test_with_ts + $(MAKE) blackbox_crash_test_with_ts blackbox_crash_test: db_stress - python -u tools/db_crashtest.py --simple blackbox $(CRASH_TEST_EXT_ARGS) - python -u tools/db_crashtest.py blackbox $(CRASH_TEST_EXT_ARGS) + $(PYTHON) -u tools/db_crashtest.py --simple blackbox $(CRASH_TEST_EXT_ARGS) + $(PYTHON) -u tools/db_crashtest.py blackbox $(CRASH_TEST_EXT_ARGS) blackbox_crash_test_with_atomic_flush: db_stress - python -u tools/db_crashtest.py --cf_consistency blackbox $(CRASH_TEST_EXT_ARGS) + $(PYTHON) -u tools/db_crashtest.py --cf_consistency blackbox $(CRASH_TEST_EXT_ARGS) blackbox_crash_test_with_txn: db_stress - python -u tools/db_crashtest.py --txn blackbox $(CRASH_TEST_EXT_ARGS) + $(PYTHON) -u tools/db_crashtest.py --txn blackbox $(CRASH_TEST_EXT_ARGS) + +blackbox_crash_test_with_best_efforts_recovery: db_stress + $(PYTHON) -u tools/db_crashtest.py --test_best_efforts_recovery blackbox $(CRASH_TEST_EXT_ARGS) + +blackbox_crash_test_with_ts: db_stress + $(PYTHON) -u tools/db_crashtest.py --enable_ts blackbox $(CRASH_TEST_EXT_ARGS) ifeq ($(CRASH_TEST_KILL_ODD),) CRASH_TEST_KILL_ODD=888887 endif whitebox_crash_test: db_stress - python -u tools/db_crashtest.py --simple whitebox --random_kill_odd \ + $(PYTHON) -u tools/db_crashtest.py --simple whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) - python -u tools/db_crashtest.py whitebox --random_kill_odd \ + $(PYTHON) -u tools/db_crashtest.py whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) whitebox_crash_test_with_atomic_flush: db_stress - python -u tools/db_crashtest.py --cf_consistency whitebox --random_kill_odd \ + $(PYTHON) -u tools/db_crashtest.py --cf_consistency whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) whitebox_crash_test_with_txn: db_stress - python -u tools/db_crashtest.py --txn whitebox --random_kill_odd \ + $(PYTHON) -u tools/db_crashtest.py --txn whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) -asan_check: - $(MAKE) clean +whitebox_crash_test_with_ts: db_stress + $(PYTHON) -u tools/db_crashtest.py --enable_ts whitebox --random_kill_odd \ + $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) + +asan_check: clean COMPILE_WITH_ASAN=1 $(MAKE) check -j32 $(MAKE) clean -asan_crash_test: - $(MAKE) clean +asan_crash_test: clean COMPILE_WITH_ASAN=1 $(MAKE) crash_test $(MAKE) clean -asan_crash_test_with_atomic_flush: +whitebox_asan_crash_test: clean + COMPILE_WITH_ASAN=1 $(MAKE) whitebox_crash_test $(MAKE) clean - COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_atomic_flush + +blackbox_asan_crash_test: clean + COMPILE_WITH_ASAN=1 $(MAKE) blackbox_crash_test $(MAKE) clean -asan_crash_test_with_txn: +asan_crash_test_with_atomic_flush: clean + COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_atomic_flush $(MAKE) clean + +asan_crash_test_with_txn: clean COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_txn $(MAKE) clean -ubsan_check: +asan_crash_test_with_best_efforts_recovery: clean + COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_best_efforts_recovery $(MAKE) clean + +ubsan_check: clean COMPILE_WITH_UBSAN=1 $(MAKE) check -j32 $(MAKE) clean -ubsan_crash_test: - $(MAKE) clean +ubsan_crash_test: clean COMPILE_WITH_UBSAN=1 $(MAKE) crash_test $(MAKE) clean -ubsan_crash_test_with_atomic_flush: +whitebox_ubsan_crash_test: clean + COMPILE_WITH_UBSAN=1 $(MAKE) whitebox_crash_test $(MAKE) clean - COMPILE_WITH_UBSAN=1 $(MAKE) crash_test_with_atomic_flush + +blackbox_ubsan_crash_test: clean + COMPILE_WITH_UBSAN=1 $(MAKE) blackbox_crash_test $(MAKE) clean -ubsan_crash_test_with_txn: +ubsan_crash_test_with_atomic_flush: clean + COMPILE_WITH_UBSAN=1 $(MAKE) crash_test_with_atomic_flush $(MAKE) clean + +ubsan_crash_test_with_txn: clean COMPILE_WITH_UBSAN=1 $(MAKE) crash_test_with_txn $(MAKE) clean +ubsan_crash_test_with_best_efforts_recovery: clean + COMPILE_WITH_UBSAN=1 $(MAKE) crash_test_with_best_efforts_recovery + $(MAKE) clean + +full_valgrind_test: + ROCKSDB_FULL_VALGRIND_RUN=1 DISABLE_JEMALLOC=1 $(MAKE) valgrind_check + +full_valgrind_test_some: + ROCKSDB_FULL_VALGRIND_RUN=1 DISABLE_JEMALLOC=1 $(MAKE) valgrind_check_some + valgrind_test: ROCKSDB_VALGRIND_RUN=1 DISABLE_JEMALLOC=1 $(MAKE) valgrind_check +valgrind_test_some: + ROCKSDB_VALGRIND_RUN=1 DISABLE_JEMALLOC=1 $(MAKE) valgrind_check_some + valgrind_check: $(TESTS) $(MAKE) DRIVER="$(VALGRIND_VER) $(VALGRIND_OPTS)" gen_parallel_tests $(AM_V_GEN)if test "$(J)" != 1 \ @@ -1051,12 +1163,20 @@ done; \ fi +valgrind_check_some: $(ROCKSDBTESTS_SUBSET) + for t in $(ROCKSDBTESTS_SUBSET); do \ + $(VALGRIND_VER) $(VALGRIND_OPTS) ./$$t; \ + ret_code=$$?; \ + if [ $$ret_code -ne 0 ]; then \ + exit $$ret_code; \ + fi; \ + done ifneq ($(PAR_TEST),) parloop: ret_bad=0; \ for t in $(PAR_TEST); do \ - echo "===== Running $$t in parallel $(NUM_PAR)";\ + echo "===== Running $$t in parallel $(NUM_PAR) (`date`)";\ if [ $(db_test) -eq 1 ]; then \ seq $(J) | v="$$t" build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{}; export TEST_TMPDIR=$$s;' \ 'timeout 2m ./db_test --gtest_filter=$$v >> $$s/log-{} 2>1'; \ @@ -1108,22 +1228,22 @@ $(MAKE) dbg CLEAN_FILES += unity.cc -unity.cc: Makefile +unity.cc: Makefile util/build_version.cc.in rm -f $@ $@-t + $(AM_V_at)$(gen_build_version) > util/build_version.cc for source_file in $(LIB_SOURCES); do \ echo "#include \"$$source_file\"" >> $@-t; \ done chmod a=r $@-t mv $@-t $@ -unity.a: unity.o +unity.a: $(OBJ_DIR)/unity.o $(AM_V_AR)rm -f $@ - $(AM_V_at)$(AR) $(ARFLAGS) $@ unity.o + $(AM_V_at)$(AR) $(ARFLAGS) $@ $(OBJ_DIR)/unity.o -TOOLLIBOBJECTS = $(TOOL_LIB_SOURCES:.cc=.o) # try compiling db_test with unity -unity_test: db/db_test.o db/db_test_util.o $(TESTHARNESS) $(TOOLLIBOBJECTS) unity.a +unity_test: $(OBJ_DIR)/db/db_basic_test.o $(OBJ_DIR)/db/db_test_util.o $(TEST_OBJECTS) $(TOOL_OBJECTS) unity.a $(AM_LINK) ./unity_test @@ -1135,12 +1255,15 @@ clean-not-downloaded: clean-ext-libraries-bin clean-rocks clean-not-downloaded-rocksjava clean-rocks: - rm -f $(BENCHMARKS) $(TOOLS) $(TESTS) $(PARALLEL_TEST) $(LIBRARY) $(SHARED) + echo shared=$(ALL_SHARED_LIBS) + echo static=$(ALL_STATIC_LIBS) + rm -f $(BENCHMARKS) $(TOOLS) $(TESTS) $(PARALLEL_TEST) $(ALL_STATIC_LIBS) $(ALL_SHARED_LIBS) $(MICROBENCHS) rm -rf $(CLEAN_FILES) ios-x86 ios-arm scan_build_report $(FIND) . -name "*.[oda]" -exec rm -f {} \; - $(FIND) . -type f -regex ".*\.\(\(gcda\)\|\(gcno\)\)" -exec rm {} \; + $(FIND) . -type f -regex ".*\.\(\(gcda\)\|\(gcno\)\)" -exec rm -f {} \; clean-rocksjava: + rm -rf jl jls cd java && $(MAKE) clean clean-not-downloaded-rocksjava: @@ -1167,603 +1290,769 @@ format: build_tools/format-diff.sh +check-format: + build_tools/format-diff.sh -c + +check-buck-targets: + buckifier/check_buck_targets.sh + +check-sources: + build_tools/check-sources.sh + package: bash build_tools/make_package.sh $(SHARED_MAJOR).$(SHARED_MINOR) # --------------------------------------------------------------------------- # Unit tests and tools # --------------------------------------------------------------------------- -$(LIBRARY): $(LIBOBJECTS) - $(AM_V_AR)rm -f $@ - $(AM_V_at)$(AR) $(ARFLAGS) $@ $(LIBOBJECTS) +$(STATIC_LIBRARY): $(LIB_OBJECTS) + $(AM_V_AR)rm -f $@ $(SHARED1) $(SHARED2) $(SHARED3) $(SHARED4) + $(AM_V_at)$(AR) $(ARFLAGS) $@ $(LIB_OBJECTS) -$(TOOLS_LIBRARY): $(BENCH_LIB_SOURCES:.cc=.o) $(TOOL_LIB_SOURCES:.cc=.o) $(LIB_SOURCES:.cc=.o) $(TESTUTIL) $(ANALYZER_LIB_SOURCES:.cc=.o) - $(AM_V_AR)rm -f $@ +$(STATIC_TEST_LIBRARY): $(TEST_OBJECTS) + $(AM_V_AR)rm -f $@ $(SHARED_TEST_LIBRARY) $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ -$(STRESS_LIBRARY): $(LIB_SOURCES:.cc=.o) $(TESTUTIL) $(ANALYZER_LIB_SOURCES:.cc=.o) $(STRESS_LIB_SOURCES:.cc=.o) - $(AM_V_AR)rm -f $@ +$(STATIC_TOOLS_LIBRARY): $(TOOL_OBJECTS) + $(AM_V_AR)rm -f $@ $(SHARED_TOOLS_LIBRARY) $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ -librocksdb_env_basic_test.a: env/env_basic_test.o $(LIBOBJECTS) $(TESTHARNESS) +$(STATIC_STRESS_LIBRARY): $(ANALYZE_OBJECTS) $(STRESS_OBJECTS) $(TESTUTIL) + $(AM_V_AR)rm -f $@ $(SHARED_STRESS_LIBRARY) + $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ + +$(SHARED_TEST_LIBRARY): $(TEST_OBJECTS) $(SHARED1) + $(AM_V_AR)rm -f $@ $(STATIC_TEST_LIBRARY) + $(AM_SHARE) + +$(SHARED_TOOLS_LIBRARY): $(TOOL_OBJECTS) $(SHARED1) + $(AM_V_AR)rm -f $@ $(STATIC_TOOLS_LIBRARY) + $(AM_SHARE) + +$(SHARED_STRESS_LIBRARY): $(ANALYZE_OBJECTS) $(STRESS_OBJECTS) $(TESTUTIL) $(SHARED_TOOLS_LIBRARY) $(SHARED1) + $(AM_V_AR)rm -f $@ $(STATIC_STRESS_LIBRARY) + $(AM_SHARE) + +librocksdb_env_basic_test.a: $(OBJ_DIR)/env/env_basic_test.o $(LIB_OBJECTS) $(TESTHARNESS) $(AM_V_AR)rm -f $@ $(AM_V_at)$(AR) $(ARFLAGS) $@ $^ -db_bench: tools/db_bench.o $(BENCHTOOLOBJECTS) +db_bench: $(OBJ_DIR)/tools/db_bench.o $(BENCH_OBJECTS) $(TESTUTIL) $(LIBRARY) $(AM_LINK) -trace_analyzer: tools/trace_analyzer.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS) +trace_analyzer: $(OBJ_DIR)/tools/trace_analyzer.o $(ANALYZE_OBJECTS) $(TOOLS_LIBRARY) $(LIBRARY) $(AM_LINK) -block_cache_trace_analyzer: tools/block_cache_analyzer/block_cache_trace_analyzer_tool.o $(ANALYZETOOLOBJECTS) $(LIBOBJECTS) +block_cache_trace_analyzer: $(OBJ_DIR)/tools/block_cache_analyzer/block_cache_trace_analyzer_tool.o $(ANALYZE_OBJECTS) $(TOOLS_LIBRARY) $(LIBRARY) $(AM_LINK) ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) -folly_synchronization_distributed_mutex_test: $(LIBOBJECTS) $(TESTHARNESS) $(FOLLYOBJECTS) third-party/folly/folly/synchronization/test/DistributedMutexTest.o +folly_synchronization_distributed_mutex_test: $(OBJ_DIR)/third-party/folly/folly/synchronization/test/DistributedMutexTest.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) endif -cache_bench: cache/cache_bench.o $(LIBOBJECTS) $(TESTUTIL) +cache_bench: $(OBJ_DIR)/cache/cache_bench.o $(CACHE_BENCH_OBJECTS) $(LIBRARY) + $(AM_LINK) + +persistent_cache_bench: $(OBJ_DIR)/utilities/persistent_cache/persistent_cache_bench.o $(LIBRARY) + $(AM_LINK) + +memtablerep_bench: $(OBJ_DIR)/memtable/memtablerep_bench.o $(LIBRARY) + $(AM_LINK) + +filter_bench: $(OBJ_DIR)/util/filter_bench.o $(LIBRARY) $(AM_LINK) -persistent_cache_bench: utilities/persistent_cache/persistent_cache_bench.o $(LIBOBJECTS) $(TESTUTIL) +db_stress: $(OBJ_DIR)/db_stress_tool/db_stress.o $(STRESS_LIBRARY) $(TOOLS_LIBRARY) $(LIBRARY) $(AM_LINK) -memtablerep_bench: memtable/memtablerep_bench.o $(LIBOBJECTS) $(TESTUTIL) +write_stress: $(OBJ_DIR)/tools/write_stress.o $(LIBRARY) $(AM_LINK) -filter_bench: util/filter_bench.o $(LIBOBJECTS) $(TESTUTIL) +db_sanity_test: $(OBJ_DIR)/tools/db_sanity_test.o $(LIBRARY) $(AM_LINK) -db_stress: db_stress_tool/db_stress.o $(STRESSTOOLOBJECTS) +db_repl_stress: $(OBJ_DIR)/tools/db_repl_stress.o $(LIBRARY) $(AM_LINK) -write_stress: tools/write_stress.o $(LIBOBJECTS) $(TESTUTIL) +arena_test: $(OBJ_DIR)/memory/arena_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_sanity_test: tools/db_sanity_test.o $(LIBOBJECTS) $(TESTUTIL) +memory_allocator_test: memory/memory_allocator_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_repl_stress: tools/db_repl_stress.o $(LIBOBJECTS) $(TESTUTIL) +autovector_test: $(OBJ_DIR)/util/autovector_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -arena_test: memory/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) +column_family_test: $(OBJ_DIR)/db/column_family_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -autovector_test: util/autovector_test.o $(LIBOBJECTS) $(TESTHARNESS) +table_properties_collector_test: $(OBJ_DIR)/db/table_properties_collector_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -column_family_test: db/column_family_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +bloom_test: $(OBJ_DIR)/util/bloom_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -table_properties_collector_test: db/table_properties_collector_test.o $(LIBOBJECTS) $(TESTHARNESS) +dynamic_bloom_test: $(OBJ_DIR)/util/dynamic_bloom_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) +c_test: $(OBJ_DIR)/db/c_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -dynamic_bloom_test: util/dynamic_bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) +cache_test: $(OBJ_DIR)/cache/cache_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) +coding_test: $(OBJ_DIR)/util/coding_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cache_test: cache/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) +hash_test: $(OBJ_DIR)/util/hash_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) +random_test: $(OBJ_DIR)/util/random_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -hash_test: util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) +ribbon_test: $(OBJ_DIR)/util/ribbon_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -random_test: util/random_test.o $(LIBOBJECTS) $(TESTHARNESS) +option_change_migration_test: $(OBJ_DIR)/utilities/option_change_migration/option_change_migration_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -option_change_migration_test: utilities/option_change_migration/option_change_migration_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +stringappend_test: $(OBJ_DIR)/utilities/merge_operators/string_append/stringappend_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -stringappend_test: utilities/merge_operators/string_append/stringappend_test.o $(LIBOBJECTS) $(TESTHARNESS) +cassandra_format_test: $(OBJ_DIR)/utilities/cassandra/cassandra_format_test.o $(OBJ_DIR)/utilities/cassandra/test_utils.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cassandra_format_test: utilities/cassandra/cassandra_format_test.o utilities/cassandra/test_utils.o $(LIBOBJECTS) $(TESTHARNESS) +cassandra_functional_test: $(OBJ_DIR)/utilities/cassandra/cassandra_functional_test.o $(OBJ_DIR)/utilities/cassandra/test_utils.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cassandra_functional_test: utilities/cassandra/cassandra_functional_test.o utilities/cassandra/test_utils.o $(LIBOBJECTS) $(TESTHARNESS) +cassandra_row_merge_test: $(OBJ_DIR)/utilities/cassandra/cassandra_row_merge_test.o $(OBJ_DIR)/utilities/cassandra/test_utils.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cassandra_row_merge_test: utilities/cassandra/cassandra_row_merge_test.o utilities/cassandra/test_utils.o $(LIBOBJECTS) $(TESTHARNESS) +cassandra_serialize_test: $(OBJ_DIR)/utilities/cassandra/cassandra_serialize_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cassandra_serialize_test: utilities/cassandra/cassandra_serialize_test.o $(LIBOBJECTS) $(TESTHARNESS) +hash_table_test: $(OBJ_DIR)/utilities/persistent_cache/hash_table_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -hash_table_test: utilities/persistent_cache/hash_table_test.o $(LIBOBJECTS) $(TESTHARNESS) +histogram_test: $(OBJ_DIR)/monitoring/histogram_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -histogram_test: monitoring/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) +thread_local_test: $(OBJ_DIR)/util/thread_local_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -thread_local_test: util/thread_local_test.o $(LIBOBJECTS) $(TESTHARNESS) +work_queue_test: $(OBJ_DIR)/util/work_queue_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -corruption_test: db/corruption_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +corruption_test: $(OBJ_DIR)/db/corruption_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) +crc32c_test: $(OBJ_DIR)/util/crc32c_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -slice_test: util/slice_test.o $(LIBOBJECTS) $(TESTHARNESS) +slice_test: $(OBJ_DIR)/util/slice_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -slice_transform_test: util/slice_transform_test.o $(LIBOBJECTS) $(TESTHARNESS) +slice_transform_test: $(OBJ_DIR)/util/slice_transform_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_basic_test: db/db_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_basic_test: $(OBJ_DIR)/db/db_basic_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_encryption_test: db/db_encryption_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_blob_basic_test: $(OBJ_DIR)/db/blob/db_blob_basic_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_test: db/db_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_blob_compaction_test: $(OBJ_DIR)/db/blob/db_blob_compaction_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_test2: db/db_test2.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_with_timestamp_basic_test: $(OBJ_DIR)/db/db_with_timestamp_basic_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_blob_index_test: db/db_blob_index_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_with_timestamp_compaction_test: db/db_with_timestamp_compaction_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_block_cache_test: db/db_block_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_encryption_test: $(OBJ_DIR)/db/db_encryption_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_bloom_filter_test: db/db_bloom_filter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_test: $(OBJ_DIR)/db/db_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_log_iter_test: db/db_log_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_test2: $(OBJ_DIR)/db/db_test2.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_compaction_filter_test: db/db_compaction_filter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_logical_block_size_cache_test: $(OBJ_DIR)/db/db_logical_block_size_cache_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_compaction_test: db/db_compaction_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_blob_index_test: $(OBJ_DIR)/db/blob/db_blob_index_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_dynamic_level_test: db/db_dynamic_level_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_block_cache_test: $(OBJ_DIR)/db/db_block_cache_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_flush_test: db/db_flush_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_bloom_filter_test: $(OBJ_DIR)/db/db_bloom_filter_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_inplace_update_test: db/db_inplace_update_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_log_iter_test: $(OBJ_DIR)/db/db_log_iter_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_iterator_test: db/db_iterator_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_compaction_filter_test: $(OBJ_DIR)/db/db_compaction_filter_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_memtable_test: db/db_memtable_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_compaction_test: $(OBJ_DIR)/db/db_compaction_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_merge_operator_test: db/db_merge_operator_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_dynamic_level_test: $(OBJ_DIR)/db/db_dynamic_level_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_merge_operand_test: db/db_merge_operand_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_flush_test: $(OBJ_DIR)/db/db_flush_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_options_test: db/db_options_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_inplace_update_test: $(OBJ_DIR)/db/db_inplace_update_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_range_del_test: db/db_range_del_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_iterator_test: $(OBJ_DIR)/db/db_iterator_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_sst_test: db/db_sst_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_kv_checksum_test: $(OBJ_DIR)/db/db_kv_checksum_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_statistics_test: db/db_statistics_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_memtable_test: $(OBJ_DIR)/db/db_memtable_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_write_test: db/db_write_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_merge_operator_test: $(OBJ_DIR)/db/db_merge_operator_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -error_handler_test: db/error_handler_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_merge_operand_test: $(OBJ_DIR)/db/db_merge_operand_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -external_sst_file_basic_test: db/external_sst_file_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_options_test: $(OBJ_DIR)/db/db_options_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -external_sst_file_test: db/external_sst_file_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_range_del_test: $(OBJ_DIR)/db/db_range_del_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -import_column_family_test: db/import_column_family_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_sst_test: $(OBJ_DIR)/db/db_sst_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_tailing_iter_test: db/db_tailing_iter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_statistics_test: $(OBJ_DIR)/db/db_statistics_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_iter_test: db/db_iter_test.o $(LIBOBJECTS) $(TESTHARNESS) +db_write_test: $(OBJ_DIR)/db/db_write_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_iter_stress_test: db/db_iter_stress_test.o $(LIBOBJECTS) $(TESTHARNESS) +error_handler_fs_test: $(OBJ_DIR)/db/error_handler_fs_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_universal_compaction_test: db/db_universal_compaction_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +external_sst_file_basic_test: $(OBJ_DIR)/db/external_sst_file_basic_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_wal_test: db/db_wal_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +external_sst_file_test: $(OBJ_DIR)/db/external_sst_file_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_io_failure_test: db/db_io_failure_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +import_column_family_test: $(OBJ_DIR)/db/import_column_family_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_properties_test: db/db_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_tailing_iter_test: $(OBJ_DIR)/db/db_tailing_iter_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_table_properties_test: db/db_table_properties_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +db_iter_test: $(OBJ_DIR)/db/db_iter_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -log_write_bench: util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) +db_iter_stress_test: $(OBJ_DIR)/db/db_iter_stress_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +db_universal_compaction_test: $(OBJ_DIR)/db/db_universal_compaction_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +db_wal_test: $(OBJ_DIR)/db/db_wal_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +db_io_failure_test: $(OBJ_DIR)/db/db_io_failure_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +db_properties_test: $(OBJ_DIR)/db/db_properties_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +db_table_properties_test: $(OBJ_DIR)/db/db_table_properties_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +log_write_bench: $(OBJ_DIR)/util/log_write_bench.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) $(PROFILING_FLAGS) -plain_table_db_test: db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) +plain_table_db_test: $(OBJ_DIR)/db/plain_table_db_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -comparator_db_test: db/comparator_db_test.o $(LIBOBJECTS) $(TESTHARNESS) +comparator_db_test: $(OBJ_DIR)/db/comparator_db_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -table_reader_bench: table/table_reader_bench.o $(LIBOBJECTS) $(TESTHARNESS) +table_reader_bench: $(OBJ_DIR)/table/table_reader_bench.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) $(PROFILING_FLAGS) -perf_context_test: db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) +perf_context_test: $(OBJ_DIR)/db/perf_context_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) -prefix_test: db/prefix_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) +prefix_test: $(OBJ_DIR)/db/prefix_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) -backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TESTHARNESS) +backupable_db_test: $(OBJ_DIR)/utilities/backupable/backupable_db_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -checkpoint_test: utilities/checkpoint/checkpoint_test.o $(LIBOBJECTS) $(TESTHARNESS) +checkpoint_test: $(OBJ_DIR)/utilities/checkpoint/checkpoint_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cache_simulator_test: utilities/simulator_cache/cache_simulator_test.o $(LIBOBJECTS) $(TESTHARNESS) +cache_simulator_test: $(OBJ_DIR)/utilities/simulator_cache/cache_simulator_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -sim_cache_test: utilities/simulator_cache/sim_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +sim_cache_test: $(OBJ_DIR)/utilities/simulator_cache/sim_cache_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -env_mirror_test: utilities/env_mirror_test.o $(LIBOBJECTS) $(TESTHARNESS) +env_mirror_test: $(OBJ_DIR)/utilities/env_mirror_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -env_timed_test: utilities/env_timed_test.o $(LIBOBJECTS) $(TESTHARNESS) +env_timed_test: $(OBJ_DIR)/utilities/env_timed_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) ifdef ROCKSDB_USE_LIBRADOS -env_librados_test: utilities/env_librados_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +env_librados_test: $(OBJ_DIR)/utilities/env_librados_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) endif -object_registry_test: utilities/object_registry_test.o $(LIBOBJECTS) $(TESTHARNESS) +object_registry_test: $(OBJ_DIR)/utilities/object_registry_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +ttl_test: $(OBJ_DIR)/utilities/ttl/ttl_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +write_batch_with_index_test: $(OBJ_DIR)/utilities/write_batch_with_index/write_batch_with_index_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +flush_job_test: $(OBJ_DIR)/db/flush_job_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +compaction_iterator_test: $(OBJ_DIR)/db/compaction/compaction_iterator_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +compaction_job_test: $(OBJ_DIR)/db/compaction/compaction_job_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +compaction_job_stats_test: $(OBJ_DIR)/db/compaction/compaction_job_stats_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +compaction_service_test: $(OBJ_DIR)/db/compaction/compaction_service_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +compact_on_deletion_collector_test: $(OBJ_DIR)/utilities/table_properties_collectors/compact_on_deletion_collector_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +wal_manager_test: $(OBJ_DIR)/db/wal_manager_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -ttl_test: utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS) +wal_edit_test: $(OBJ_DIR)/db/wal_edit_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -write_batch_with_index_test: utilities/write_batch_with_index/write_batch_with_index_test.o $(LIBOBJECTS) $(TESTHARNESS) +dbformat_test: $(OBJ_DIR)/db/dbformat_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -flush_job_test: db/flush_job_test.o $(LIBOBJECTS) $(TESTHARNESS) +env_basic_test: $(OBJ_DIR)/env/env_basic_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -compaction_iterator_test: db/compaction/compaction_iterator_test.o $(LIBOBJECTS) $(TESTHARNESS) +env_test: $(OBJ_DIR)/env/env_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -compaction_job_test: db/compaction/compaction_job_test.o $(LIBOBJECTS) $(TESTHARNESS) +io_posix_test: $(OBJ_DIR)/env/io_posix_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -compaction_job_stats_test: db/compaction/compaction_job_stats_test.o $(LIBOBJECTS) $(TESTHARNESS) +fault_injection_test: $(OBJ_DIR)/db/fault_injection_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -compact_on_deletion_collector_test: utilities/table_properties_collectors/compact_on_deletion_collector_test.o $(LIBOBJECTS) $(TESTHARNESS) +rate_limiter_test: $(OBJ_DIR)/util/rate_limiter_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -wal_manager_test: db/wal_manager_test.o $(LIBOBJECTS) $(TESTHARNESS) +delete_scheduler_test: $(OBJ_DIR)/file/delete_scheduler_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) +filename_test: $(OBJ_DIR)/db/filename_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -env_basic_test: env/env_basic_test.o $(LIBOBJECTS) $(TESTHARNESS) +random_access_file_reader_test: $(OBJ_DIR)/file/random_access_file_reader_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -env_test: env/env_test.o $(LIBOBJECTS) $(TESTHARNESS) +file_reader_writer_test: $(OBJ_DIR)/util/file_reader_writer_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -fault_injection_test: db/fault_injection_test.o $(LIBOBJECTS) $(TESTHARNESS) +block_based_filter_block_test: $(OBJ_DIR)/table/block_based/block_based_filter_block_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -rate_limiter_test: util/rate_limiter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +block_based_table_reader_test: table/block_based/block_based_table_reader_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -delete_scheduler_test: file/delete_scheduler_test.o $(LIBOBJECTS) $(TESTHARNESS) +full_filter_block_test: $(OBJ_DIR)/table/block_based/full_filter_block_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) +partitioned_filter_block_test: $(OBJ_DIR)/table/block_based/partitioned_filter_block_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -file_reader_writer_test: util/file_reader_writer_test.o $(LIBOBJECTS) $(TESTHARNESS) +log_test: $(OBJ_DIR)/db/log_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -block_based_filter_block_test: table/block_based/block_based_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) +cleanable_test: $(OBJ_DIR)/table/cleanable_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -full_filter_block_test: table/block_based/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) +table_test: $(OBJ_DIR)/table/table_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -partitioned_filter_block_test: table/block_based/partitioned_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) +block_fetcher_test: table/block_fetcher_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) +block_test: $(OBJ_DIR)/table/block_based/block_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cleanable_test: table/cleanable_test.o $(LIBOBJECTS) $(TESTHARNESS) +data_block_hash_index_test: $(OBJ_DIR)/table/block_based/data_block_hash_index_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) +inlineskiplist_test: $(OBJ_DIR)/memtable/inlineskiplist_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -block_test: table/block_based/block_test.o $(LIBOBJECTS) $(TESTHARNESS) +skiplist_test: $(OBJ_DIR)/memtable/skiplist_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -data_block_hash_index_test: table/block_based/data_block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS) +write_buffer_manager_test: $(OBJ_DIR)/memtable/write_buffer_manager_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -inlineskiplist_test: memtable/inlineskiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) +version_edit_test: $(OBJ_DIR)/db/version_edit_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -skiplist_test: memtable/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) +version_set_test: $(OBJ_DIR)/db/version_set_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -write_buffer_manager_test: memtable/write_buffer_manager_test.o $(LIBOBJECTS) $(TESTHARNESS) +compaction_picker_test: $(OBJ_DIR)/db/compaction/compaction_picker_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) +version_builder_test: $(OBJ_DIR)/db/version_builder_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) +file_indexer_test: $(OBJ_DIR)/db/file_indexer_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -compaction_picker_test: db/compaction/compaction_picker_test.o $(LIBOBJECTS) $(TESTHARNESS) +reduce_levels_test: $(OBJ_DIR)/tools/reduce_levels_test.o $(TOOLS_LIBRARY) $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -version_builder_test: db/version_builder_test.o $(LIBOBJECTS) $(TESTHARNESS) +write_batch_test: $(OBJ_DIR)/db/write_batch_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -file_indexer_test: db/file_indexer_test.o $(LIBOBJECTS) $(TESTHARNESS) +write_controller_test: $(OBJ_DIR)/db/write_controller_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -reduce_levels_test: tools/reduce_levels_test.o $(LIBOBJECTS) $(TESTHARNESS) +merge_helper_test: $(OBJ_DIR)/db/merge_helper_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) +memory_test: $(OBJ_DIR)/utilities/memory/memory_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -write_controller_test: db/write_controller_test.o $(LIBOBJECTS) $(TESTHARNESS) +merge_test: $(OBJ_DIR)/db/merge_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -merge_helper_test: db/merge_helper_test.o $(LIBOBJECTS) $(TESTHARNESS) +merger_test: $(OBJ_DIR)/table/merger_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -memory_test: utilities/memory/memory_test.o $(LIBOBJECTS) $(TESTHARNESS) +util_merge_operators_test: $(OBJ_DIR)/utilities/util_merge_operators_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -merge_test: db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS) +options_file_test: $(OBJ_DIR)/db/options_file_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -merger_test: table/merger_test.o $(LIBOBJECTS) $(TESTHARNESS) +deletefile_test: $(OBJ_DIR)/db/deletefile_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -util_merge_operators_test: utilities/util_merge_operators_test.o $(LIBOBJECTS) $(TESTHARNESS) +obsolete_files_test: $(OBJ_DIR)/db/obsolete_files_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -options_file_test: db/options_file_test.o $(LIBOBJECTS) $(TESTHARNESS) +rocksdb_dump: $(OBJ_DIR)/tools/dump/rocksdb_dump.o $(LIBRARY) $(AM_LINK) -deletefile_test: db/deletefile_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +rocksdb_undump: $(OBJ_DIR)/tools/dump/rocksdb_undump.o $(LIBRARY) $(AM_LINK) -obsolete_files_test: db/obsolete_files_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +cuckoo_table_builder_test: $(OBJ_DIR)/table/cuckoo/cuckoo_table_builder_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -rocksdb_dump: tools/dump/rocksdb_dump.o $(LIBOBJECTS) +cuckoo_table_reader_test: $(OBJ_DIR)/table/cuckoo/cuckoo_table_reader_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -rocksdb_undump: tools/dump/rocksdb_undump.o $(LIBOBJECTS) +cuckoo_table_db_test: $(OBJ_DIR)/db/cuckoo_table_db_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cuckoo_table_builder_test: table/cuckoo/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS) +listener_test: $(OBJ_DIR)/db/listener_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cuckoo_table_reader_test: table/cuckoo/cuckoo_table_reader_test.o $(LIBOBJECTS) $(TESTHARNESS) +thread_list_test: $(OBJ_DIR)/util/thread_list_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -cuckoo_table_db_test: db/cuckoo_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) +compact_files_test: $(OBJ_DIR)/db/compact_files_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -listener_test: db/listener_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +configurable_test: options/configurable_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -thread_list_test: util/thread_list_test.o $(LIBOBJECTS) $(TESTHARNESS) +customizable_test: options/customizable_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -compact_files_test: db/compact_files_test.o $(LIBOBJECTS) $(TESTHARNESS) +options_test: $(OBJ_DIR)/options/options_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -options_test: options/options_test.o $(LIBOBJECTS) $(TESTHARNESS) +options_settable_test: $(OBJ_DIR)/options/options_settable_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -options_settable_test: options/options_settable_test.o $(LIBOBJECTS) $(TESTHARNESS) +options_util_test: $(OBJ_DIR)/utilities/options/options_util_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -options_util_test: utilities/options/options_util_test.o $(LIBOBJECTS) $(TESTHARNESS) +db_bench_tool_test: $(OBJ_DIR)/tools/db_bench_tool_test.o $(BENCH_OBJECTS) $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_bench_tool_test: tools/db_bench_tool_test.o $(BENCHTOOLOBJECTS) $(TESTHARNESS) +trace_analyzer_test: $(OBJ_DIR)/tools/trace_analyzer_test.o $(ANALYZE_OBJECTS) $(TOOLS_LIBRARY) $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -trace_analyzer_test: tools/trace_analyzer_test.o $(LIBOBJECTS) $(ANALYZETOOLOBJECTS) $(TESTHARNESS) +event_logger_test: $(OBJ_DIR)/logging/event_logger_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -event_logger_test: logging/event_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) +timer_queue_test: $(OBJ_DIR)/util/timer_queue_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -timer_queue_test: util/timer_queue_test.o $(LIBOBJECTS) $(TESTHARNESS) +sst_dump_test: $(OBJ_DIR)/tools/sst_dump_test.o $(TOOLS_LIBRARY) $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -sst_dump_test: tools/sst_dump_test.o $(LIBOBJECTS) $(TESTHARNESS) +optimistic_transaction_test: $(OBJ_DIR)/utilities/transactions/optimistic_transaction_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -optimistic_transaction_test: utilities/transactions/optimistic_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) +mock_env_test : $(OBJ_DIR)/env/mock_env_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -mock_env_test : env/mock_env_test.o $(LIBOBJECTS) $(TESTHARNESS) +manual_compaction_test: $(OBJ_DIR)/db/manual_compaction_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -manual_compaction_test: db/manual_compaction_test.o $(LIBOBJECTS) $(TESTHARNESS) +filelock_test: $(OBJ_DIR)/util/filelock_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -filelock_test: util/filelock_test.o $(LIBOBJECTS) $(TESTHARNESS) +auto_roll_logger_test: $(OBJ_DIR)/logging/auto_roll_logger_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -auto_roll_logger_test: logging/auto_roll_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) +env_logger_test: $(OBJ_DIR)/logging/env_logger_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -env_logger_test: logging/env_logger_test.o $(LIBOBJECTS) $(TESTHARNESS) +memtable_list_test: $(OBJ_DIR)/db/memtable_list_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -memtable_list_test: db/memtable_list_test.o $(LIBOBJECTS) $(TESTHARNESS) +write_callback_test: $(OBJ_DIR)/db/write_callback_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -write_callback_test: db/write_callback_test.o $(LIBOBJECTS) $(TESTHARNESS) +heap_test: $(OBJ_DIR)/util/heap_test.o $(GTEST) $(AM_LINK) -heap_test: util/heap_test.o $(GTEST) +point_lock_manager_test: utilities/transactions/lock/point/point_lock_manager_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -transaction_test: utilities/transactions/transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) +transaction_test: $(OBJ_DIR)/utilities/transactions/transaction_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -write_prepared_transaction_test: utilities/transactions/write_prepared_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) +write_prepared_transaction_test: $(OBJ_DIR)/utilities/transactions/write_prepared_transaction_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -write_unprepared_transaction_test: utilities/transactions/write_unprepared_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS) +write_unprepared_transaction_test: $(OBJ_DIR)/utilities/transactions/write_unprepared_transaction_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -sst_dump: tools/sst_dump.o $(LIBOBJECTS) +sst_dump: $(OBJ_DIR)/tools/sst_dump.o $(TOOLS_LIBRARY) $(LIBRARY) $(AM_LINK) -blob_dump: tools/blob_dump.o $(LIBOBJECTS) +blob_dump: $(OBJ_DIR)/tools/blob_dump.o $(TOOLS_LIBRARY) $(LIBRARY) $(AM_LINK) -repair_test: db/repair_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +repair_test: $(OBJ_DIR)/db/repair_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -ldb_cmd_test: tools/ldb_cmd_test.o $(LIBOBJECTS) $(TESTHARNESS) +ldb_cmd_test: $(OBJ_DIR)/tools/ldb_cmd_test.o $(TOOLS_LIBRARY) $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -ldb: tools/ldb.o $(LIBOBJECTS) +ldb: $(OBJ_DIR)/tools/ldb.o $(TOOLS_LIBRARY) $(LIBRARY) $(AM_LINK) -iostats_context_test: monitoring/iostats_context_test.o $(LIBOBJECTS) $(TESTHARNESS) +iostats_context_test: $(OBJ_DIR)/monitoring/iostats_context_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_V_CCLD)$(CXX) $^ $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) -persistent_cache_test: utilities/persistent_cache/persistent_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +persistent_cache_test: $(OBJ_DIR)/utilities/persistent_cache/persistent_cache_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +statistics_test: $(OBJ_DIR)/monitoring/statistics_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +stats_history_test: $(OBJ_DIR)/monitoring/stats_history_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +lru_cache_test: $(OBJ_DIR)/cache/lru_cache_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +range_del_aggregator_test: $(OBJ_DIR)/db/range_del_aggregator_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +range_del_aggregator_bench: $(OBJ_DIR)/db/range_del_aggregator_bench.o $(LIBRARY) + $(AM_LINK) + +blob_db_test: $(OBJ_DIR)/utilities/blob_db/blob_db_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +repeatable_thread_test: $(OBJ_DIR)/util/repeatable_thread_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +range_locking_test: utilities/transactions/lock/range/range_locking_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +range_tombstone_fragmenter_test: $(OBJ_DIR)/db/range_tombstone_fragmenter_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +sst_file_reader_test: $(OBJ_DIR)/table/sst_file_reader_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +db_secondary_test: $(OBJ_DIR)/db/db_secondary_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +block_cache_tracer_test: $(OBJ_DIR)/trace_replay/block_cache_tracer_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +block_cache_trace_analyzer_test: $(OBJ_DIR)/tools/block_cache_analyzer/block_cache_trace_analyzer_test.o $(OBJ_DIR)/tools/block_cache_analyzer/block_cache_trace_analyzer.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -statistics_test: monitoring/statistics_test.o $(LIBOBJECTS) $(TESTHARNESS) +defer_test: $(OBJ_DIR)/util/defer_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -stats_history_test: monitoring/stats_history_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +blob_counting_iterator_test: $(OBJ_DIR)/db/blob/blob_counting_iterator_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -lru_cache_test: cache/lru_cache_test.o $(LIBOBJECTS) $(TESTHARNESS) +blob_file_addition_test: $(OBJ_DIR)/db/blob/blob_file_addition_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -range_del_aggregator_test: db/range_del_aggregator_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +blob_file_builder_test: $(OBJ_DIR)/db/blob/blob_file_builder_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -range_del_aggregator_bench: db/range_del_aggregator_bench.o $(LIBOBJECTS) $(TESTUTIL) +blob_file_cache_test: $(OBJ_DIR)/db/blob/blob_file_cache_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -blob_db_test: utilities/blob_db/blob_db_test.o $(LIBOBJECTS) $(TESTHARNESS) +blob_file_garbage_test: $(OBJ_DIR)/db/blob/blob_file_garbage_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -repeatable_thread_test: util/repeatable_thread_test.o $(LIBOBJECTS) $(TESTHARNESS) +blob_file_reader_test: $(OBJ_DIR)/db/blob/blob_file_reader_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -range_tombstone_fragmenter_test: db/range_tombstone_fragmenter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +blob_garbage_meter_test: $(OBJ_DIR)/db/blob/blob_garbage_meter_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -sst_file_reader_test: table/sst_file_reader_test.o $(LIBOBJECTS) $(TESTHARNESS) +timer_test: $(OBJ_DIR)/util/timer_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -db_secondary_test: db/db_impl/db_secondary_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) +periodic_work_scheduler_test: $(OBJ_DIR)/db/periodic_work_scheduler_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -block_cache_tracer_test: trace_replay/block_cache_tracer_test.o trace_replay/block_cache_tracer.o $(LIBOBJECTS) $(TESTHARNESS) +testutil_test: $(OBJ_DIR)/test_util/testutil_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -block_cache_trace_analyzer_test: tools/block_cache_analyzer/block_cache_trace_analyzer_test.o tools/block_cache_analyzer/block_cache_trace_analyzer.o $(LIBOBJECTS) $(TESTHARNESS) +io_tracer_test: $(OBJ_DIR)/trace_replay/io_tracer_test.o $(OBJ_DIR)/trace_replay/io_tracer.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) -defer_test: util/defer_test.o $(LIBOBJECTS) $(TESTHARNESS) +prefetch_test: $(OBJ_DIR)/file/prefetch_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) +io_tracer_parser_test: $(OBJ_DIR)/tools/io_tracer_parser_test.o $(OBJ_DIR)/tools/io_tracer_parser_tool.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +io_tracer_parser: $(OBJ_DIR)/tools/io_tracer_parser.o $(TOOLS_LIBRARY) $(LIBRARY) + $(AM_LINK) + +db_blob_corruption_test: $(OBJ_DIR)/db/blob/db_blob_corruption_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +db_write_buffer_manager_test: $(OBJ_DIR)/db/db_write_buffer_manager_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +clipping_iterator_test: $(OBJ_DIR)/db/compaction/clipping_iterator_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + +ribbon_bench: $(OBJ_DIR)/microbench/ribbon_bench.o $(LIBRARY) + $(AM_LINK) + +db_basic_bench: $(OBJ_DIR)/microbench/db_basic_bench.o $(LIBRARY) + $(AM_LINK) + +cache_reservation_manager_test: $(OBJ_DIR)/cache/cache_reservation_manager_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) #------------------------------------------------- # make install related stuff -INSTALL_PATH ?= /usr/local +PREFIX ?= /usr/local +LIBDIR ?= $(PREFIX)/lib +INSTALL_LIBDIR = $(DESTDIR)$(LIBDIR) uninstall: - rm -rf $(INSTALL_PATH)/include/rocksdb \ - $(INSTALL_PATH)/lib/$(LIBRARY) \ - $(INSTALL_PATH)/lib/$(SHARED4) \ - $(INSTALL_PATH)/lib/$(SHARED3) \ - $(INSTALL_PATH)/lib/$(SHARED2) \ - $(INSTALL_PATH)/lib/$(SHARED1) - -install-headers: - install -d $(INSTALL_PATH)/lib + rm -rf $(DESTDIR)$(PREFIX)/include/rocksdb \ + $(INSTALL_LIBDIR)/$(LIBRARY) \ + $(INSTALL_LIBDIR)/$(SHARED4) \ + $(INSTALL_LIBDIR)/$(SHARED3) \ + $(INSTALL_LIBDIR)/$(SHARED2) \ + $(INSTALL_LIBDIR)/$(SHARED1) \ + $(INSTALL_LIBDIR)/pkgconfig/rocksdb.pc + +install-headers: gen-pc + install -d $(INSTALL_LIBDIR) + install -d $(INSTALL_LIBDIR)/pkgconfig for header_dir in `$(FIND) "include/rocksdb" -type d`; do \ - install -d $(INSTALL_PATH)/$$header_dir; \ + install -d $(DESTDIR)/$(PREFIX)/$$header_dir; \ done for header in `$(FIND) "include/rocksdb" -type f -name *.h`; do \ - install -C -m 644 $$header $(INSTALL_PATH)/$$header; \ + install -C -m 644 $$header $(DESTDIR)/$(PREFIX)/$$header; \ + done + for header in $(ROCKSDB_PLUGIN_HEADERS); do \ + install -d $(DESTDIR)/$(PREFIX)/include/rocksdb/`dirname $$header`; \ + install -C -m 644 $$header $(DESTDIR)/$(PREFIX)/include/rocksdb/$$header; \ done + install -C -m 644 rocksdb.pc $(INSTALL_LIBDIR)/pkgconfig/rocksdb.pc install-static: install-headers $(LIBRARY) - install -C -m 755 $(LIBRARY) $(INSTALL_PATH)/lib + install -d $(INSTALL_LIBDIR) + install -C -m 755 $(LIBRARY) $(INSTALL_LIBDIR) install-shared: install-headers $(SHARED4) - install -C -m 755 $(SHARED4) $(INSTALL_PATH)/lib && \ - ln -fs $(SHARED4) $(INSTALL_PATH)/lib/$(SHARED3) && \ - ln -fs $(SHARED4) $(INSTALL_PATH)/lib/$(SHARED2) && \ - ln -fs $(SHARED4) $(INSTALL_PATH)/lib/$(SHARED1) + install -d $(INSTALL_LIBDIR) + install -C -m 755 $(SHARED4) $(INSTALL_LIBDIR) + ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED3) + ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED2) + ln -fs $(SHARED4) $(INSTALL_LIBDIR)/$(SHARED1) # install static by default + install shared if it exists install: install-static [ -e $(SHARED4) ] && $(MAKE) install-shared || : +# Generate the pkg-config file +gen-pc: + -echo 'prefix=$(PREFIX)' > rocksdb.pc + -echo 'exec_prefix=$${prefix}' >> rocksdb.pc + -echo 'includedir=$${prefix}/include' >> rocksdb.pc + -echo 'libdir=$(LIBDIR)' >> rocksdb.pc + -echo '' >> rocksdb.pc + -echo 'Name: rocksdb' >> rocksdb.pc + -echo 'Description: An embeddable persistent key-value store for fast storage' >> rocksdb.pc + -echo Version: $(shell ./build_tools/version.sh full) >> rocksdb.pc + -echo 'Libs: -L$${libdir} $(EXEC_LDFLAGS) -lrocksdb' >> rocksdb.pc + -echo 'Libs.private: $(PLATFORM_LDFLAGS)' >> rocksdb.pc + -echo 'Cflags: -I$${includedir} $(PLATFORM_CXXFLAGS)' >> rocksdb.pc + -echo 'Requires: $(subst ",,$(ROCKSDB_PLUGIN_PKGCONFIG_REQUIRES))' >> rocksdb.pc + #------------------------------------------------- # --------------------------------------------------------------------------- # Jni stuff # --------------------------------------------------------------------------- - JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux ifeq ($(PLATFORM), OS_SOLARIS) ARCH := $(shell isainfo -b) else ifeq ($(PLATFORM), OS_OPENBSD) - ifneq (,$(filter amd64 ppc64 ppc64le arm64 aarch64 sparc64, $(MACHINE))) + ifneq (,$(filter amd64 ppc64 ppc64le s390x arm64 aarch64 sparc64, $(MACHINE))) ARCH := 64 else ARCH := 32 @@ -1783,37 +2072,48 @@ JNI_LIBC_POSTFIX = -$(JNI_LIBC) endif -ifneq (,$(filter ppc% arm64 aarch64 sparc64, $(MACHINE))) +ifeq (,$(ROCKSDBJNILIB)) +ifneq (,$(filter ppc% s390x arm64 aarch64 sparc64, $(MACHINE))) ROCKSDBJNILIB = librocksdbjni-linux-$(MACHINE)$(JNI_LIBC_POSTFIX).so else ROCKSDBJNILIB = librocksdbjni-linux$(ARCH)$(JNI_LIBC_POSTFIX).so endif -ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux$(ARCH)$(JNI_LIBC_POSTFIX).jar -ROCKSDB_JAR_ALL = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar -ROCKSDB_JAVADOCS_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-javadoc.jar -ROCKSDB_SOURCES_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-sources.jar +endif +ROCKSDB_JAVA_VERSION ?= $(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH) +ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-linux$(ARCH)$(JNI_LIBC_POSTFIX).jar +ROCKSDB_JAR_ALL = rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar +ROCKSDB_JAVADOCS_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-javadoc.jar +ROCKSDB_SOURCES_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-sources.jar SHA256_CMD = sha256sum ZLIB_VER ?= 1.2.11 ZLIB_SHA256 ?= c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1 -ZLIB_DOWNLOAD_BASE ?= http://zlib.net -BZIP2_VER ?= 1.0.6 -BZIP2_SHA256 ?= a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd -BZIP2_DOWNLOAD_BASE ?= https://downloads.sourceforge.net/project/bzip2 -SNAPPY_VER ?= 1.1.7 -SNAPPY_SHA256 ?= 3dfa02e873ff51a11ee02b9ca391807f0c8ea0529a4924afa645fbf97163f9d4 +ZLIB_DOWNLOAD_BASE ?= https://zlib.net/fossils +BZIP2_VER ?= 1.0.8 +BZIP2_SHA256 ?= ab5a03176ee106d3f0fa90e381da478ddae405918153cca248e682cd0c4a2269 +BZIP2_DOWNLOAD_BASE ?= http://sourceware.org/pub/bzip2 +SNAPPY_VER ?= 1.1.8 +SNAPPY_SHA256 ?= 16b677f07832a612b0836178db7f374e414f94657c138e6993cbfc5dcc58651f SNAPPY_DOWNLOAD_BASE ?= https://github.com/google/snappy/archive -LZ4_VER ?= 1.9.2 -LZ4_SHA256 ?= 658ba6191fa44c92280d4aa2c271b0f4fbc0e34d249578dd05e50e76d0e5efcc +LZ4_VER ?= 1.9.3 +LZ4_SHA256 ?= 030644df4611007ff7dc962d981f390361e6c97a34e5cbc393ddfbe019ffe2c1 LZ4_DOWNLOAD_BASE ?= https://github.com/lz4/lz4/archive -ZSTD_VER ?= 1.4.4 -ZSTD_SHA256 ?= a364f5162c7d1a455cc915e8e3cf5f4bd8b75d09bc0f53965b0c9ca1383c52c8 +ZSTD_VER ?= 1.4.9 +ZSTD_SHA256 ?= acf714d98e3db7b876e5b540cbf6dee298f60eb3c0723104f6d3f065cd60d6a8 ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 ifeq ($(PLATFORM), OS_MACOSX) +ifeq (,$(findstring librocksdbjni-osx,$(ROCKSDBJNILIB))) +ifeq ($(MACHINE),arm64) + ROCKSDBJNILIB = librocksdbjni-osx-arm64.jnilib +else ifeq ($(MACHINE),x86_64) + ROCKSDBJNILIB = librocksdbjni-osx-x86_64.jnilib +else ROCKSDBJNILIB = librocksdbjni-osx.jnilib - ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar +endif +endif + ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-osx.jar SHA256_CMD = openssl sha256 -r ifneq ("$(wildcard $(JAVA_HOME)/include/darwin)","") JAVA_INCLUDE = -I$(JAVA_HOME)/include -I $(JAVA_HOME)/include/darwin @@ -1821,10 +2121,11 @@ JAVA_INCLUDE = -I/System/Library/Frameworks/JavaVM.framework/Headers/ endif endif + ifeq ($(PLATFORM), OS_FREEBSD) JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/freebsd ROCKSDBJNILIB = librocksdbjni-freebsd$(ARCH).so - ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-freebsd$(ARCH).jar + ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-freebsd$(ARCH).jar endif ifeq ($(PLATFORM), OS_SOLARIS) ROCKSDBJNILIB = librocksdbjni-solaris$(ARCH).so @@ -1839,142 +2140,186 @@ SNAPPY_MAKE_TARGET = libsnappy.la endif ifeq ($(PLATFORM), OS_OPENBSD) - JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/openbsd + JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/openbsd ROCKSDBJNILIB = librocksdbjni-openbsd$(ARCH).so - ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-openbsd$(ARCH).jar + ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-openbsd$(ARCH).jar endif -libz.a: - -rm -rf zlib-$(ZLIB_VER) -ifeq (,$(wildcard ./zlib-$(ZLIB_VER).tar.gz)) +zlib-$(ZLIB_VER).tar.gz: curl --fail --output zlib-$(ZLIB_VER).tar.gz --location ${ZLIB_DOWNLOAD_BASE}/zlib-$(ZLIB_VER).tar.gz -endif ZLIB_SHA256_ACTUAL=`$(SHA256_CMD) zlib-$(ZLIB_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(ZLIB_SHA256)" != "$$ZLIB_SHA256_ACTUAL" ]; then \ echo zlib-$(ZLIB_VER).tar.gz checksum mismatch, expected=\"$(ZLIB_SHA256)\" actual=\"$$ZLIB_SHA256_ACTUAL\"; \ exit 1; \ fi + +libz.a: zlib-$(ZLIB_VER).tar.gz + -rm -rf zlib-$(ZLIB_VER) tar xvzf zlib-$(ZLIB_VER).tar.gz - cd zlib-$(ZLIB_VER) && CFLAGS='-fPIC ${EXTRA_CFLAGS}' LDFLAGS='${EXTRA_LDFLAGS}' ./configure --static && $(MAKE) + if [ -n"$(ARCHFLAG)" ]; then \ + cd zlib-$(ZLIB_VER) && CFLAGS='-fPIC ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' ./configure --static --archs="$(ARCHFLAG)" && $(MAKE); \ + else \ + cd zlib-$(ZLIB_VER) && CFLAGS='-fPIC ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' ./configure --static && $(MAKE); \ + fi cp zlib-$(ZLIB_VER)/libz.a . -libbz2.a: - -rm -rf bzip2-$(BZIP2_VER) -ifeq (,$(wildcard ./bzip2-$(BZIP2_VER).tar.gz)) +bzip2-$(BZIP2_VER).tar.gz: curl --fail --output bzip2-$(BZIP2_VER).tar.gz --location ${CURL_SSL_OPTS} ${BZIP2_DOWNLOAD_BASE}/bzip2-$(BZIP2_VER).tar.gz -endif BZIP2_SHA256_ACTUAL=`$(SHA256_CMD) bzip2-$(BZIP2_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(BZIP2_SHA256)" != "$$BZIP2_SHA256_ACTUAL" ]; then \ echo bzip2-$(BZIP2_VER).tar.gz checksum mismatch, expected=\"$(BZIP2_SHA256)\" actual=\"$$BZIP2_SHA256_ACTUAL\"; \ exit 1; \ fi + +libbz2.a: bzip2-$(BZIP2_VER).tar.gz + -rm -rf bzip2-$(BZIP2_VER) tar xvzf bzip2-$(BZIP2_VER).tar.gz - cd bzip2-$(BZIP2_VER) && $(MAKE) CFLAGS='-fPIC -O2 -g -D_FILE_OFFSET_BITS=64 ${EXTRA_CFLAGS}' AR='ar ${EXTRA_ARFLAGS}' + cd bzip2-$(BZIP2_VER) && $(MAKE) CFLAGS='-fPIC -O2 -g -D_FILE_OFFSET_BITS=64 $(ARCHFLAG) ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' AR='ar ${EXTRA_ARFLAGS}' libbz2.a cp bzip2-$(BZIP2_VER)/libbz2.a . -libsnappy.a: - -rm -rf snappy-$(SNAPPY_VER) -ifeq (,$(wildcard ./snappy-$(SNAPPY_VER).tar.gz)) +snappy-$(SNAPPY_VER).tar.gz: curl --fail --output snappy-$(SNAPPY_VER).tar.gz --location ${CURL_SSL_OPTS} ${SNAPPY_DOWNLOAD_BASE}/$(SNAPPY_VER).tar.gz -endif SNAPPY_SHA256_ACTUAL=`$(SHA256_CMD) snappy-$(SNAPPY_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(SNAPPY_SHA256)" != "$$SNAPPY_SHA256_ACTUAL" ]; then \ echo snappy-$(SNAPPY_VER).tar.gz checksum mismatch, expected=\"$(SNAPPY_SHA256)\" actual=\"$$SNAPPY_SHA256_ACTUAL\"; \ exit 1; \ fi + +libsnappy.a: snappy-$(SNAPPY_VER).tar.gz + -rm -rf snappy-$(SNAPPY_VER) tar xvzf snappy-$(SNAPPY_VER).tar.gz mkdir snappy-$(SNAPPY_VER)/build - cd snappy-$(SNAPPY_VER)/build && CFLAGS='${EXTRA_CFLAGS}' CXXFLAGS='${EXTRA_CXXFLAGS}' LDFLAGS='${EXTRA_LDFLAGS}' cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON .. && $(MAKE) ${SNAPPY_MAKE_TARGET} + cd snappy-$(SNAPPY_VER)/build && CFLAGS='$(ARCHFLAG) ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' CXXFLAGS='$(ARCHFLAG) ${JAVA_STATIC_DEPS_CXXFLAGS} ${EXTRA_CXXFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON ${PLATFORM_CMAKE_FLAGS} .. && $(MAKE) ${SNAPPY_MAKE_TARGET} cp snappy-$(SNAPPY_VER)/build/libsnappy.a . -liblz4.a: - -rm -rf lz4-$(LZ4_VER) -ifeq (,$(wildcard ./lz4-$(LZ4_VER).tar.gz)) +lz4-$(LZ4_VER).tar.gz: curl --fail --output lz4-$(LZ4_VER).tar.gz --location ${CURL_SSL_OPTS} ${LZ4_DOWNLOAD_BASE}/v$(LZ4_VER).tar.gz -endif LZ4_SHA256_ACTUAL=`$(SHA256_CMD) lz4-$(LZ4_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(LZ4_SHA256)" != "$$LZ4_SHA256_ACTUAL" ]; then \ echo lz4-$(LZ4_VER).tar.gz checksum mismatch, expected=\"$(LZ4_SHA256)\" actual=\"$$LZ4_SHA256_ACTUAL\"; \ exit 1; \ fi + +liblz4.a: lz4-$(LZ4_VER).tar.gz + -rm -rf lz4-$(LZ4_VER) tar xvzf lz4-$(LZ4_VER).tar.gz - cd lz4-$(LZ4_VER)/lib && $(MAKE) CFLAGS='-fPIC -O2 ${EXTRA_CFLAGS}' all + cd lz4-$(LZ4_VER)/lib && $(MAKE) CFLAGS='-fPIC -O2 $(ARCHFLAG) ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' all cp lz4-$(LZ4_VER)/lib/liblz4.a . -libzstd.a: - -rm -rf zstd-$(ZSTD_VER) -ifeq (,$(wildcard ./zstd-$(ZSTD_VER).tar.gz)) +zstd-$(ZSTD_VER).tar.gz: curl --fail --output zstd-$(ZSTD_VER).tar.gz --location ${CURL_SSL_OPTS} ${ZSTD_DOWNLOAD_BASE}/v$(ZSTD_VER).tar.gz -endif ZSTD_SHA256_ACTUAL=`$(SHA256_CMD) zstd-$(ZSTD_VER).tar.gz | cut -d ' ' -f 1`; \ if [ "$(ZSTD_SHA256)" != "$$ZSTD_SHA256_ACTUAL" ]; then \ echo zstd-$(ZSTD_VER).tar.gz checksum mismatch, expected=\"$(ZSTD_SHA256)\" actual=\"$$ZSTD_SHA256_ACTUAL\"; \ exit 1; \ fi + +libzstd.a: zstd-$(ZSTD_VER).tar.gz + -rm -rf zstd-$(ZSTD_VER) tar xvzf zstd-$(ZSTD_VER).tar.gz - cd zstd-$(ZSTD_VER)/lib && DESTDIR=. PREFIX= $(MAKE) CFLAGS='-fPIC -O2 ${EXTRA_CFLAGS}' install + cd zstd-$(ZSTD_VER)/lib && DESTDIR=. PREFIX= $(MAKE) CFLAGS='-fPIC -O2 $(ARCHFLAG) ${JAVA_STATIC_DEPS_CCFLAGS} ${EXTRA_CFLAGS}' LDFLAGS='${JAVA_STATIC_DEPS_LDFLAGS} ${EXTRA_LDFLAGS}' libzstd.a cp zstd-$(ZSTD_VER)/lib/libzstd.a . -# A version of each $(LIBOBJECTS) compiled with -fPIC and a fixed set of static compression libraries -java_static_libobjects = $(patsubst %,jls/%,$(LIB_CC_OBJECTS)) -CLEAN_FILES += jls -java_static_all_libobjects = $(java_static_libobjects) - +# A version of each $(LIB_OBJECTS) compiled with -fPIC and a fixed set of static compression libraries ifneq ($(ROCKSDB_JAVA_NO_COMPRESSION), 1) JAVA_COMPRESSIONS = libz.a libbz2.a libsnappy.a liblz4.a libzstd.a endif JAVA_STATIC_FLAGS = -DZLIB -DBZIP2 -DSNAPPY -DLZ4 -DZSTD -JAVA_STATIC_INCLUDES = -I./zlib-$(ZLIB_VER) -I./bzip2-$(BZIP2_VER) -I./snappy-$(SNAPPY_VER) -I./lz4-$(LZ4_VER)/lib -I./zstd-$(ZSTD_VER)/lib/include +JAVA_STATIC_INCLUDES = -I./zlib-$(ZLIB_VER) -I./bzip2-$(BZIP2_VER) -I./snappy-$(SNAPPY_VER) -I./snappy-$(SNAPPY_VER)/build -I./lz4-$(LZ4_VER)/lib -I./zstd-$(ZSTD_VER)/lib -I./zstd-$(ZSTD_VER)/lib/dictBuilder -ifeq ($(HAVE_POWER8),1) -JAVA_STATIC_C_LIBOBJECTS = $(patsubst %.c.o,jls/%.c.o,$(LIB_SOURCES_C:.c=.o)) -JAVA_STATIC_ASM_LIBOBJECTS = $(patsubst %.S.o,jls/%.S.o,$(LIB_SOURCES_ASM:.S=.o)) - -java_static_ppc_libobjects = $(JAVA_STATIC_C_LIBOBJECTS) $(JAVA_STATIC_ASM_LIBOBJECTS) - -jls/util/crc32c_ppc.o: util/crc32c_ppc.c - $(AM_V_CC)$(CC) $(CFLAGS) $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) -c $< -o $@ - -jls/util/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S - $(AM_V_CC)$(CC) $(CFLAGS) $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) -c $< -o $@ - -java_static_all_libobjects += $(java_static_ppc_libobjects) +ifneq ($(findstring rocksdbjavastatic, $(filter-out rocksdbjavastatic_deps, $(MAKECMDGOALS))),) +CXXFLAGS += $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) +CFLAGS += $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) +endif +rocksdbjavastatic: +ifeq ($(JAVA_HOME),) + $(error JAVA_HOME is not set) +endif + $(MAKE) rocksdbjavastatic_deps + $(MAKE) rocksdbjavastatic_libobjects + $(MAKE) rocksdbjavastatic_javalib + $(MAKE) rocksdbjava_jar + +rocksdbjavastaticosx: rocksdbjavastaticosx_archs + cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) librocksdbjni-osx-x86_64.jnilib librocksdbjni-osx-arm64.jnilib + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 + +rocksdbjavastaticosx_ub: rocksdbjavastaticosx_archs + cd java/target; lipo -create -output librocksdbjni-osx.jnilib librocksdbjni-osx-x86_64.jnilib librocksdbjni-osx-arm64.jnilib + cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) librocksdbjni-osx.jnilib + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 + +rocksdbjavastaticosx_archs: + $(MAKE) rocksdbjavastaticosx_arch_x86_64 + $(MAKE) rocksdbjavastaticosx_arch_arm64 + +rocksdbjavastaticosx_arch_%: +ifeq ($(JAVA_HOME),) + $(error JAVA_HOME is not set) +endif + $(MAKE) clean-ext-libraries-bin + $(MAKE) clean-rocks + ARCHFLAG="-arch $*" $(MAKE) rocksdbjavastatic_deps + ARCHFLAG="-arch $*" $(MAKE) rocksdbjavastatic_libobjects + ARCHFLAG="-arch $*" ROCKSDBJNILIB="librocksdbjni-osx-$*.jnilib" $(MAKE) rocksdbjavastatic_javalib + +ifeq ($(JAR_CMD),) +ifneq ($(JAVA_HOME),) +JAR_CMD := $(JAVA_HOME)/bin/jar +else +JAR_CMD := jar endif - -$(java_static_libobjects): jls/%.o: %.cc $(JAVA_COMPRESSIONS) - $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(JAVA_STATIC_FLAGS) $(JAVA_STATIC_INCLUDES) -fPIC -c $< -o $@ $(COVERAGEFLAGS) - -rocksdbjavastatic: $(java_static_all_libobjects) - cd java;$(MAKE) javalib; - rm -f ./java/target/$(ROCKSDBJNILIB) +endif +rocksdbjavastatic_javalib: + cd java; SHA256_CMD='$(SHA256_CMD)' $(MAKE) javalib + rm -f java/target/$(ROCKSDBJNILIB) $(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC \ -o ./java/target/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) \ - $(java_static_all_libobjects) $(COVERAGEFLAGS) \ + $(LIB_OBJECTS) $(COVERAGEFLAGS) \ $(JAVA_COMPRESSIONS) $(JAVA_STATIC_LDFLAGS) cd java/target;if [ "$(DEBUG_LEVEL)" == "0" ]; then \ strip $(STRIPFLAGS) $(ROCKSDBJNILIB); \ fi - cd java;jar -cf target/$(ROCKSDB_JAR) HISTORY*.md - cd java/target;jar -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) - cd java/target/classes;jar -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class - cd java/target/apidocs;jar -cf ../$(ROCKSDB_JAVADOCS_JAR) * - cd java/src/main/java;jar -cf ../../../target/$(ROCKSDB_SOURCES_JAR) org -rocksdbjavastaticrelease: rocksdbjavastatic +rocksdbjava_jar: + cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 + +rocksdbjava_javadocs_jar: + cd java/target/apidocs; $(JAR_CMD) -cf ../$(ROCKSDB_JAVADOCS_JAR) * + openssl sha1 java/target/$(ROCKSDB_JAVADOCS_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAVADOCS_JAR).sha1 + +rocksdbjava_sources_jar: + cd java/src/main/java; $(JAR_CMD) -cf ../../../target/$(ROCKSDB_SOURCES_JAR) org + openssl sha1 java/target/$(ROCKSDB_SOURCES_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_SOURCES_JAR).sha1 + +rocksdbjavastatic_deps: $(JAVA_COMPRESSIONS) + +rocksdbjavastatic_libobjects: $(LIB_OBJECTS) + +rocksdbjavastaticrelease: rocksdbjavastaticosx rocksdbjava_javadocs_jar rocksdbjava_sources_jar cd java/crossbuild && (vagrant destroy -f || true) && vagrant up linux32 && vagrant halt linux32 && vagrant up linux64 && vagrant halt linux64 && vagrant up linux64-musl && vagrant halt linux64-musl - cd java;jar -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md - cd java/target;jar -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib - cd java/target/classes;jar -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class - -rocksdbjavastaticreleasedocker: rocksdbjavastatic rocksdbjavastaticdockerx86 rocksdbjavastaticdockerx86_64 rocksdbjavastaticdockerx86musl rocksdbjavastaticdockerx86_64musl - cd java;jar -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md - cd java/target;jar -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib - cd java/target/classes;jar -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class + cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class + openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 + +rocksdbjavastaticreleasedocker: rocksdbjavastaticosx rocksdbjavastaticdockerx86 rocksdbjavastaticdockerx86_64 rocksdbjavastaticdockerx86musl rocksdbjavastaticdockerx86_64musl rocksdbjava_javadocs_jar rocksdbjava_sources_jar + cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class + openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 rocksdbjavastaticdockerx86: mkdir -p java/target - docker run --rm --name rocksdb_linux_x86-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos6_x86-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh + docker run --rm --name rocksdb_linux_x86-be --platform linux/386 --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos6_x86-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh rocksdbjavastaticdockerx86_64: mkdir -p java/target @@ -1988,87 +2333,83 @@ mkdir -p java/target docker run --rm --name rocksdb_linux_arm64v8-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos7_arm64v8-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh +rocksdbjavastaticdockers390x: + mkdir -p java/target + docker run --rm --name rocksdb_linux_s390x-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos7_s390x-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh + rocksdbjavastaticdockerx86musl: mkdir -p java/target - docker run --rm --name rocksdb_linux_x86-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_x86-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh + docker run --rm --name rocksdb_linux_x86-musl-be --platform linux/386 --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_x86-be /rocksdb-host/java/crossbuild/docker-build-linux-alpine.sh rocksdbjavastaticdockerx86_64musl: mkdir -p java/target - docker run --rm --name rocksdb_linux_x64-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_x64-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh + docker run --rm --name rocksdb_linux_x64-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_x64-be /rocksdb-host/java/crossbuild/docker-build-linux-alpine.sh rocksdbjavastaticdockerppc64lemusl: mkdir -p java/target - docker run --rm --name rocksdb_linux_ppc64le-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_ppc64le-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh + docker run --rm --name rocksdb_linux_ppc64le-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_ppc64le-be /rocksdb-host/java/crossbuild/docker-build-linux-alpine.sh rocksdbjavastaticdockerarm64v8musl: mkdir -p java/target - docker run --rm --name rocksdb_linux_arm64v8-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_arm64v8-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh + docker run --rm --name rocksdb_linux_arm64v8-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_arm64v8-be /rocksdb-host/java/crossbuild/docker-build-linux-alpine.sh + +rocksdbjavastaticdockers390xmusl: + mkdir -p java/target + docker run --rm --name rocksdb_linux_s390x-musl-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_s390x-be /rocksdb-host/java/crossbuild/docker-build-linux-alpine.sh rocksdbjavastaticpublish: rocksdbjavastaticrelease rocksdbjavastaticpublishcentral rocksdbjavastaticpublishdocker: rocksdbjavastaticreleasedocker rocksdbjavastaticpublishcentral -rocksdbjavastaticpublishcentral: - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-javadoc.jar -Dclassifier=javadoc - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-sources.jar -Dclassifier=sources - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux64.jar -Dclassifier=linux64 - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux32.jar -Dclassifier=linux32 - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux64-musl.jar -Dclassifier=linux64-musl - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux32-musl.jar -Dclassifier=linux32-musl - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-osx.jar -Dclassifier=osx - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-win64.jar -Dclassifier=win64 - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/rocksjni.pom -Dfile=java/target/rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar - -# A version of each $(LIBOBJECTS) compiled with -fPIC -ifeq ($(HAVE_POWER8),1) -JAVA_CC_OBJECTS = $(SHARED_CC_OBJECTS) -JAVA_C_OBJECTS = $(SHARED_C_OBJECTS) -JAVA_ASM_OBJECTS = $(SHARED_ASM_OBJECTS) - -JAVA_C_LIBOBJECTS = $(patsubst %.c.o,jl/%.c.o,$(JAVA_C_OBJECTS)) -JAVA_ASM_LIBOBJECTS = $(patsubst %.S.o,jl/%.S.o,$(JAVA_ASM_OBJECTS)) -endif - -java_libobjects = $(patsubst %,jl/%,$(LIB_CC_OBJECTS)) -CLEAN_FILES += jl -java_all_libobjects = $(java_libobjects) +ROCKSDB_JAVA_RELEASE_CLASSIFIERS = javadoc sources linux64 linux32 linux64-musl linux32-musl osx win64 -ifeq ($(HAVE_POWER8),1) -java_ppc_libobjects = $(JAVA_C_LIBOBJECTS) $(JAVA_ASM_LIBOBJECTS) +rocksdbjavastaticpublishcentral: rocksdbjavageneratepom + mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -Dclassifier=$(classifier);) + +rocksdbjavageneratepom: + cd java;cat pom.xml.template | sed 's/\$${ROCKSDB_JAVA_VERSION}/$(ROCKSDB_JAVA_VERSION)/' > pom.xml + +rocksdbjavastaticnexusbundlejar: rocksdbjavageneratepom + openssl sha1 -r java/pom.xml | awk '{ print $$1 }' > java/target/pom.xml.sha1 + openssl sha1 -r java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar | awk '{ print $$1 }' > java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), openssl sha1 -r java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar | awk '{ print $$1 }' > java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1;) + gpg --yes --output java/target/pom.xml.asc -ab java/pom.xml + gpg --yes -ab java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), gpg --yes -ab java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar;) + $(JAR_CMD) cvf java/target/nexus-bundle-rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java pom.xml -C java/target pom.xml.sha1 -C java/target pom.xml.asc -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar.asc + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), $(JAR_CMD) uf java/target/nexus-bundle-rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1 -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.asc;) -jl/crc32c_ppc.o: util/crc32c_ppc.c - $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ -jl/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S - $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ -java_all_libobjects += $(java_ppc_libobjects) -endif +# A version of each $(LIBOBJECTS) compiled with -fPIC -$(java_libobjects): jl/%.o: %.cc +jl/%.o: %.cc $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -fPIC -c $< -o $@ $(COVERAGEFLAGS) - - -rocksdbjava: $(java_all_libobjects) - $(AM_V_GEN)cd java;$(MAKE) javalib; +rocksdbjava: $(LIB_OBJECTS) +ifeq ($(JAVA_HOME),) + $(error JAVA_HOME is not set) +endif + $(AM_V_GEN)cd java; SHA256_CMD='$(SHA256_CMD)' $(MAKE) javalib; $(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB) - $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) $(java_all_libobjects) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) - $(AM_V_at)cd java;jar -cf target/$(ROCKSDB_JAR) HISTORY*.md - $(AM_V_at)cd java/target;jar -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) - $(AM_V_at)cd java/target/classes;jar -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(JNI_NATIVE_SOURCES) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) + $(AM_V_at)cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md + $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) + $(AM_V_at)cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + $(AM_V_at)openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 jclean: cd java;$(MAKE) clean; jtest_compile: rocksdbjava - cd java;$(MAKE) java_test + cd java; SHA256_CMD='$(SHA256_CMD)' $(MAKE) java_test jtest_run: cd java;$(MAKE) run_test jtest: rocksdbjava - cd java;$(MAKE) sample;$(MAKE) test; - python tools/check_all_python.py # TODO peterd: find a better place for this check in CI targets + cd java;$(MAKE) sample; SHA256_CMD='$(SHA256_CMD)' $(MAKE) test; + $(PYTHON) tools/check_all_python.py # TODO peterd: find a better place for this check in CI targets jdb_bench: cd java;$(MAKE) db_bench; @@ -2107,30 +2448,32 @@ else ifeq ($(HAVE_POWER8),1) -util/crc32c_ppc.o: util/crc32c_ppc.c +$(OBJ_DIR)/util/crc32c_ppc.o: util/crc32c_ppc.c $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ -util/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S +$(OBJ_DIR)/util/crc32c_ppc_asm.o: util/crc32c_ppc_asm.S $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ endif -.cc.o: - $(AM_V_CC)$(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) +$(OBJ_DIR)/%.o: %.cc + $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) -.cpp.o: - $(AM_V_CC)$(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) +$(OBJ_DIR)/%.o: %.cpp + $(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS) -.c.o: +$(OBJ_DIR)/%.o: %.c $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@ endif + # --------------------------------------------------------------------------- # Source files dependencies detection # --------------------------------------------------------------------------- - -all_sources = $(LIB_SOURCES) $(MAIN_SOURCES) $(MOCK_LIB_SOURCES) $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(TEST_LIB_SOURCES) $(ANALYZER_LIB_SOURCES) $(STRESS_LIB_SOURCES) -DEPFILES = $(all_sources:.cc=.cc.d) - +# If skip dependencies is ON, skip including the dep files +ifneq ($(SKIP_DEPENDS), 1) +DEPFILES = $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, $(ALL_SOURCES)) +DEPFILES+ = $(patsubst %.c, $(OBJ_DIR)/%.c.d, $(LIB_SOURCES_C) $(TEST_MAIN_SOURCES_C)) ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1) - DEPFILES += $(FOLLY_SOURCES:.cpp=.cpp.d) + DEPFILES +=$(patsubst %.cpp, $(OBJ_DIR)/%.cpp.d, $(FOLLY_SOURCES)) +endif endif # Add proper dependency support so changing a .h file forces a .cc file to @@ -2138,23 +2481,25 @@ # The .d file indicates .cc file's dependencies on .h files. We generate such # dependency by g++'s -MM option, whose output is a make dependency rule. -%.cc.d: %.cc - @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ - -MM -MT'$@' -MT'$(<:.cc=.o)' "$<" -o '$@' - -%.cpp.d: %.cpp - @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ - -MM -MT'$@' -MT'$(<:.cpp=.o)' "$<" -o '$@' +$(OBJ_DIR)/%.cc.d: %.cc + @mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ + -MM -MT'$@' -MT'$(<:.cc=.o)' -MT'$(<:%.cc=$(OBJ_DIR)/%.o)' \ + "$<" -o '$@' + +$(OBJ_DIR)/%.cpp.d: %.cpp + @mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ + -MM -MT'$@' -MT'$(<:.cpp=.o)' -MT'$(<:%.cpp=$(OBJ_DIR)/%.o)' \ + "$<" -o '$@' ifeq ($(HAVE_POWER8),1) -DEPFILES_C = $(LIB_SOURCES_C:.c=.c.d) -DEPFILES_ASM = $(LIB_SOURCES_ASM:.S=.S.d) +DEPFILES_C = $(patsubst %.c, $(OBJ_DIR)/%.c.d, $(LIB_SOURCES_C)) +DEPFILES_ASM = $(patsubst %.S, $(OBJ_DIR)/%.S.d, $(LIB_SOURCES_ASM)) -%.c.d: %.c +$(OBJ_DIR)/%.c.d: %.c @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.c=.o)' "$<" -o '$@' -%.S.d: %.S +$(OBJ_DIR)/%.S.d: %.S @$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \ -MM -MT'$@' -MT'$(<:.S=.o)' "$<" -o '$@' @@ -2166,20 +2511,12 @@ depend: $(DEPFILES) endif -# if the make goal is either "clean" or "format", we shouldn't -# try to import the *.d files. -# TODO(kailiu) The unfamiliarity of Make's conditions leads to the ugly -# working solution. -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(MAKECMDGOALS),format) -ifneq ($(MAKECMDGOALS),jclean) -ifneq ($(MAKECMDGOALS),jtest) -ifneq ($(MAKECMDGOALS),package) -ifneq ($(MAKECMDGOALS),analyze) +build_subset_tests: $(ROCKSDBTESTS_SUBSET) + $(AM_V_GEN)if [ -n "$${ROCKSDBTESTS_SUBSET_TESTS_TO_FILE}" ]; then echo "$(ROCKSDBTESTS_SUBSET)" > "$${ROCKSDBTESTS_SUBSET_TESTS_TO_FILE}"; else echo "$(ROCKSDBTESTS_SUBSET)"; fi + +# Remove the rules for which dependencies should not be generated and see if any are left. +#If so, include the dependencies; if not, do not include the dependency files +ROCKS_DEP_RULES=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean jtest package analyze tags rocksdbjavastatic% unity.% unity_test, $(MAKECMDGOALS)) +ifneq ("$(ROCKS_DEP_RULES)", "") -include $(DEPFILES) endif -endif -endif -endif -endif -endif diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/PLUGINS.md mariadb-10.11.13/storage/rocksdb/rocksdb/PLUGINS.md --- mariadb-10.11.11/storage/rocksdb/rocksdb/PLUGINS.md 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/PLUGINS.md 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,4 @@ +This is the list of all known third-party plugins for RocksDB. If something is missing, please open a pull request to add it. + +* [Dedupfs](https://github.com/ajkr/dedupfs): an example for plugin developers to reference +* [ZenFS](https://github.com/westerndigitalcorporation/zenfs): a file system for zoned block devices diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/README.md mariadb-10.11.13/storage/rocksdb/rocksdb/README.md --- mariadb-10.11.11/storage/rocksdb/rocksdb/README.md 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/README.md 2025-05-19 16:14:27.000000000 +0000 @@ -1,8 +1,9 @@ ## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage -[![Linux/Mac Build Status](https://travis-ci.org/facebook/rocksdb.svg?branch=master)](https://travis-ci.org/facebook/rocksdb) -[![Windows Build status](https://ci.appveyor.com/api/projects/status/fbgfu0so3afcno78/branch/master?svg=true)](https://ci.appveyor.com/project/Facebook/rocksdb/branch/master) -[![PPC64le Build Status](http://140.211.168.68:8080/buildStatus/icon?job=Rocksdb)](http://140.211.168.68:8080/job/Rocksdb) +[![CircleCI Status](https://circleci.com/gh/facebook/rocksdb.svg?style=svg)](https://circleci.com/gh/facebook/rocksdb) +[![TravisCI Status](https://api.travis-ci.com/facebook/rocksdb.svg?branch=main)](https://travis-ci.com/github/facebook/rocksdb) +[![Appveyor Build status](https://ci.appveyor.com/api/projects/status/fbgfu0so3afcno78/branch/main?svg=true)](https://ci.appveyor.com/project/Facebook/rocksdb/branch/main) +[![PPC64le Build Status](http://140-211-168-68-openstack.osuosl.org:8080/buildStatus/icon?job=rocksdb&style=plastic)](http://140-211-168-68-openstack.osuosl.org:8080/job/rocksdb) RocksDB is developed and maintained by Facebook Database Engineering Team. It is built on earlier work on [LevelDB](https://github.com/google/leveldb) by Sanjay Ghemawat (sanjay@google.com) @@ -16,7 +17,7 @@ making it especially suitable for storing multiple terabytes of data in a single database. -Start with example usage here: https://github.com/facebook/rocksdb/tree/master/examples +Start with example usage here: https://github.com/facebook/rocksdb/tree/main/examples See the [github wiki](https://github.com/facebook/rocksdb/wiki) for more explanation. @@ -24,7 +25,7 @@ rely on the details of any other header files in this package. Those internal APIs may be changed without warning. -Design discussions are conducted in https://www.facebook.com/groups/rocksdb.dev/ +Questions and discussions are welcome on the [RocksDB Developers Public](https://www.facebook.com/groups/rocksdb.dev/) Facebook group and [email list](https://groups.google.com/g/rocksdb) on Google Groups. ## License diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/TARGETS mariadb-10.11.13/storage/rocksdb/rocksdb/TARGETS --- mariadb-10.11.11/storage/rocksdb/rocksdb/TARGETS 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/TARGETS 2025-05-19 16:14:27.000000000 +0000 @@ -1,4 +1,5 @@ -# This file @generated by `python buckifier/buckify_rocksdb.py` +# This file @generated by: +#$ python3 buckifier/buckify_rocksdb.py # --> DO NOT EDIT MANUALLY <-- # This file is a Facebook-specific integration for buck builds, so can # only be validated by Facebook employees. @@ -9,7 +10,7 @@ REPO_PATH = package_name() + "/" -ROCKSDB_COMPILER_FLAGS = [ +ROCKSDB_COMPILER_FLAGS_0 = [ "-fno-builtin-memcmp", # Needed to compile in fbcode "-Wno-expansion-to-defined", @@ -24,19 +25,25 @@ ("zlib", None, "z"), ("gflags", None, "gflags"), ("lz4", None, "lz4"), - ("zstd", None), - ("tbb", None), - ("googletest", None, "gtest"), + ("zstd", None, "zstd"), ] -ROCKSDB_OS_DEPS = [ +ROCKSDB_OS_DEPS_0 = [ ( "linux", - ["third-party//numa:numa", "third-party//liburing:uring"], + [ + "third-party//numa:numa", + "third-party//liburing:uring", + "third-party//tbb:tbb", + ], + ), + ( + "macos", + ["third-party//tbb:tbb"], ), ] -ROCKSDB_OS_PREPROCESSOR_FLAGS = [ +ROCKSDB_OS_PREPROCESSOR_FLAGS_0 = [ ( "linux", [ @@ -50,17 +57,33 @@ "-DHAVE_SSE42", "-DLIBURING", "-DNUMA", + "-DROCKSDB_PLATFORM_POSIX", + "-DROCKSDB_LIB_IO_POSIX", + "-DTBB", ], ), ( "macos", - ["-DOS_MACOSX"], + [ + "-DOS_MACOSX", + "-DROCKSDB_PLATFORM_POSIX", + "-DROCKSDB_LIB_IO_POSIX", + "-DTBB", + ], + ), + ( + "windows", + [ + "-DOS_WIN", + "-DWIN32", + "-D_MBCS", + "-DWIN64", + "-DNOMINMAX", + ], ), ] ROCKSDB_PREPROCESSOR_FLAGS = [ - "-DROCKSDB_PLATFORM_POSIX", - "-DROCKSDB_LIB_IO_POSIX", "-DROCKSDB_SUPPORT_THREAD_LOCAL", # Flags to enable libs we include @@ -71,14 +94,15 @@ "-DZSTD", "-DZSTD_STATIC_LINKING_ONLY", "-DGFLAGS=gflags", - "-DTBB", # Added missing flags from output of build_detect_platform "-DROCKSDB_BACKTRACE", +] - # Directories with files for #include - "-I" + REPO_PATH + "include/", - "-I" + REPO_PATH, +# Directories with files for #include +ROCKSDB_INCLUDE_PATHS = [ + "", + "include", ] ROCKSDB_ARCH_PREPROCESSOR_FLAGS = { @@ -93,33 +117,53 @@ # -DNDEBUG is added by default in opt mode in fbcode. But adding it twice # doesn't harm and avoid forgetting to add it. -ROCKSDB_COMPILER_FLAGS += (["-DNDEBUG"] if is_opt_mode else []) +ROCKSDB_COMPILER_FLAGS = ROCKSDB_COMPILER_FLAGS_0 + (["-DNDEBUG"] if is_opt_mode else []) sanitizer = read_config("fbcode", "sanitizer") # Do not enable jemalloc if sanitizer presents. RocksDB will further detect # whether the binary is linked with jemalloc at runtime. -ROCKSDB_OS_PREPROCESSOR_FLAGS += ([( +ROCKSDB_OS_PREPROCESSOR_FLAGS = ROCKSDB_OS_PREPROCESSOR_FLAGS_0 + ([( "linux", ["-DROCKSDB_JEMALLOC"], )] if sanitizer == "" else []) -ROCKSDB_OS_DEPS += ([( +ROCKSDB_OS_DEPS = ROCKSDB_OS_DEPS_0 + ([( "linux", ["third-party//jemalloc:headers"], )] if sanitizer == "" else []) +ROCKSDB_LIB_DEPS = [ + ":rocksdb_lib", + ":rocksdb_test_lib", +] if not is_opt_mode else [":rocksdb_lib"] + cpp_library( name = "rocksdb_lib", srcs = [ + "cache/cache.cc", + "cache/cache_entry_roles.cc", + "cache/cache_key.cc", + "cache/cache_reservation_manager.cc", "cache/clock_cache.cc", "cache/lru_cache.cc", "cache/sharded_cache.cc", "db/arena_wrapped_db_iter.cc", + "db/blob/blob_fetcher.cc", + "db/blob/blob_file_addition.cc", + "db/blob/blob_file_builder.cc", + "db/blob/blob_file_cache.cc", + "db/blob/blob_file_garbage.cc", + "db/blob/blob_file_meta.cc", + "db/blob/blob_file_reader.cc", + "db/blob/blob_garbage_meter.cc", + "db/blob/blob_log_format.cc", + "db/blob/blob_log_sequential_reader.cc", + "db/blob/blob_log_writer.cc", + "db/blob/prefetch_buffer_collection.cc", "db/builder.cc", "db/c.cc", "db/column_family.cc", - "db/compacted_db_impl.cc", "db/compaction/compaction.cc", "db/compaction/compaction_iterator.cc", "db/compaction/compaction_job.cc", @@ -127,8 +171,10 @@ "db/compaction/compaction_picker_fifo.cc", "db/compaction/compaction_picker_level.cc", "db/compaction/compaction_picker_universal.cc", + "db/compaction/sst_partitioner.cc", "db/convenience.cc", "db/db_filesnapshot.cc", + "db/db_impl/compacted_db_impl.cc", "db/db_impl/db_impl.cc", "db/db_impl/db_impl_compaction_flush.cc", "db/db_impl/db_impl_debug.cc", @@ -159,6 +205,8 @@ "db/memtable_list.cc", "db/merge_helper.cc", "db/merge_operator.cc", + "db/output_validator.cc", + "db/periodic_work_scheduler.cc", "db/range_del_aggregator.cc", "db/range_tombstone_fragmenter.cc", "db/repair.cc", @@ -169,25 +217,32 @@ "db/trim_history_scheduler.cc", "db/version_builder.cc", "db/version_edit.cc", + "db/version_edit_handler.cc", "db/version_set.cc", + "db/wal_edit.cc", "db/wal_manager.cc", "db/write_batch.cc", "db/write_batch_base.cc", "db/write_controller.cc", "db/write_thread.cc", + "env/composite_env.cc", "env/env.cc", "env/env_chroot.cc", "env/env_encryption.cc", "env/env_hdfs.cc", "env/env_posix.cc", "env/file_system.cc", + "env/file_system_tracer.cc", "env/fs_posix.cc", + "env/fs_remap.cc", "env/io_posix.cc", "env/mock_env.cc", + "env/unique_id_gen.cc", "file/delete_scheduler.cc", "file/file_prefetch_buffer.cc", "file/file_util.cc", "file/filename.cc", + "file/line_file_reader.cc", "file/random_access_file_reader.cc", "file/read_write_util.cc", "file/readahead_raf.cc", @@ -200,6 +255,8 @@ "memory/arena.cc", "memory/concurrent_arena.cc", "memory/jemalloc_nodump_allocator.cc", + "memory/memkind_kmem_allocator.cc", + "memory/memory_allocator.cc", "memtable/alloc_tracker.cc", "memtable/hash_linklist_rep.cc", "memtable/hash_skiplist_rep.cc", @@ -221,20 +278,30 @@ "monitoring/thread_status_util.cc", "monitoring/thread_status_util_debug.cc", "options/cf_options.cc", + "options/configurable.cc", + "options/customizable.cc", "options/db_options.cc", "options/options.cc", "options/options_helper.cc", "options/options_parser.cc", - "options/options_sanity_check.cc", "port/port_posix.cc", "port/stack_trace.cc", + "port/win/env_default.cc", + "port/win/env_win.cc", + "port/win/io_win.cc", + "port/win/port_win.cc", + "port/win/win_logger.cc", + "port/win/win_thread.cc", "table/adaptive/adaptive_table_factory.cc", + "table/block_based/binary_search_index_reader.cc", "table/block_based/block.cc", "table/block_based/block_based_filter_block.cc", "table/block_based/block_based_table_builder.cc", "table/block_based/block_based_table_factory.cc", + "table/block_based/block_based_table_iterator.cc", "table/block_based/block_based_table_reader.cc", "table/block_based/block_builder.cc", + "table/block_based/block_prefetcher.cc", "table/block_based/block_prefix_index.cc", "table/block_based/data_block_footer.cc", "table/block_based/data_block_hash_index.cc", @@ -242,9 +309,14 @@ "table/block_based/filter_policy.cc", "table/block_based/flush_block_policy.cc", "table/block_based/full_filter_block.cc", + "table/block_based/hash_index_reader.cc", "table/block_based/index_builder.cc", + "table/block_based/index_reader_common.cc", "table/block_based/parsed_full_filter_block.cc", "table/block_based/partitioned_filter_block.cc", + "table/block_based/partitioned_index_iterator.cc", + "table/block_based/partitioned_index_reader.cc", + "table/block_based/reader_common.cc", "table/block_based/uncompression_dict_reader.cc", "table/block_fetcher.cc", "table/cuckoo/cuckoo_table_builder.cc", @@ -262,18 +334,26 @@ "table/plain/plain_table_index.cc", "table/plain/plain_table_key_coding.cc", "table/plain/plain_table_reader.cc", + "table/sst_file_dumper.cc", "table/sst_file_reader.cc", "table/sst_file_writer.cc", + "table/table_factory.cc", "table/table_properties.cc", "table/two_level_iterator.cc", + "table/unique_id.cc", "test_util/sync_point.cc", "test_util/sync_point_impl.cc", "test_util/transaction_test_util.cc", "tools/dump/db_dump_tool.cc", + "tools/io_tracer_parser_tool.cc", "tools/ldb_cmd.cc", "tools/ldb_tool.cc", "tools/sst_dump_tool.cc", "trace_replay/block_cache_tracer.cc", + "trace_replay/io_tracer.cc", + "trace_replay/trace_record.cc", + "trace_replay/trace_record_handler.cc", + "trace_replay/trace_record_result.cc", "trace_replay/trace_replay.cc", "util/build_version.cc", "util/coding.cc", @@ -282,12 +362,15 @@ "util/compression_context_cache.cc", "util/concurrent_task_limiter_impl.cc", "util/crc32c.cc", + "util/crc32c_arm64.cc", "util/dynamic_bloom.cc", "util/file_checksum_helper.cc", "util/hash.cc", "util/murmurhash.cc", "util/random.cc", "util/rate_limiter.cc", + "util/regex.cc", + "util/ribbon_config.cc", "util/slice.cc", "util/status.cc", "util/string_util.cc", @@ -301,20 +384,24 @@ "utilities/blob_db/blob_db_impl_filesnapshot.cc", "utilities/blob_db/blob_dump_tool.cc", "utilities/blob_db/blob_file.cc", - "utilities/blob_db/blob_log_format.cc", - "utilities/blob_db/blob_log_reader.cc", - "utilities/blob_db/blob_log_writer.cc", + "utilities/cache_dump_load.cc", + "utilities/cache_dump_load_impl.cc", "utilities/cassandra/cassandra_compaction_filter.cc", "utilities/cassandra/format.cc", "utilities/cassandra/merge_operator.cc", "utilities/checkpoint/checkpoint_impl.cc", + "utilities/compaction_filters.cc", "utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc", "utilities/convenience/info_log_finder.cc", "utilities/debug.cc", "utilities/env_mirror.cc", "utilities/env_timed.cc", + "utilities/fault_injection_env.cc", + "utilities/fault_injection_fs.cc", + "utilities/fault_injection_secondary_cache.cc", "utilities/leveldb_options/leveldb_options.cc", "utilities/memory/memory_util.cc", + "utilities/merge_operators.cc", "utilities/merge_operators/bytesxor.cc", "utilities/merge_operators/max.cc", "utilities/merge_operators/put.cc", @@ -334,6 +421,24 @@ "utilities/simulator_cache/sim_cache.cc", "utilities/table_properties_collectors/compact_on_deletion_collector.cc", "utilities/trace/file_trace_reader_writer.cc", + "utilities/trace/replayer_impl.cc", + "utilities/transactions/lock/lock_manager.cc", + "utilities/transactions/lock/point/point_lock_manager.cc", + "utilities/transactions/lock/point/point_lock_tracker.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc", + "utilities/transactions/lock/range/range_tree/lib/standalone_port.cc", + "utilities/transactions/lock/range/range_tree/lib/util/dbt.cc", + "utilities/transactions/lock/range/range_tree/lib/util/memarena.cc", + "utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc", + "utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc", "utilities/transactions/optimistic_transaction.cc", "utilities/transactions/optimistic_transaction_db_impl.cc", "utilities/transactions/pessimistic_transaction.cc", @@ -341,24 +446,356 @@ "utilities/transactions/snapshot_checker.cc", "utilities/transactions/transaction_base.cc", "utilities/transactions/transaction_db_mutex_impl.cc", - "utilities/transactions/transaction_lock_mgr.cc", "utilities/transactions/transaction_util.cc", "utilities/transactions/write_prepared_txn.cc", "utilities/transactions/write_prepared_txn_db.cc", "utilities/transactions/write_unprepared_txn.cc", "utilities/transactions/write_unprepared_txn_db.cc", "utilities/ttl/db_ttl_impl.cc", + "utilities/wal_filter.cc", "utilities/write_batch_with_index/write_batch_with_index.cc", "utilities/write_batch_with_index/write_batch_with_index_internal.cc", ], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + link_whole = False, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - deps = [], - external_deps = ROCKSDB_EXTERNAL_DEPS, + exported_deps = [], + exported_external_deps = ROCKSDB_EXTERNAL_DEPS, +) + +cpp_library( + name = "rocksdb_whole_archive_lib", + srcs = [ + "cache/cache.cc", + "cache/cache_entry_roles.cc", + "cache/cache_key.cc", + "cache/cache_reservation_manager.cc", + "cache/clock_cache.cc", + "cache/lru_cache.cc", + "cache/sharded_cache.cc", + "db/arena_wrapped_db_iter.cc", + "db/blob/blob_fetcher.cc", + "db/blob/blob_file_addition.cc", + "db/blob/blob_file_builder.cc", + "db/blob/blob_file_cache.cc", + "db/blob/blob_file_garbage.cc", + "db/blob/blob_file_meta.cc", + "db/blob/blob_file_reader.cc", + "db/blob/blob_garbage_meter.cc", + "db/blob/blob_log_format.cc", + "db/blob/blob_log_sequential_reader.cc", + "db/blob/blob_log_writer.cc", + "db/blob/prefetch_buffer_collection.cc", + "db/builder.cc", + "db/c.cc", + "db/column_family.cc", + "db/compaction/compaction.cc", + "db/compaction/compaction_iterator.cc", + "db/compaction/compaction_job.cc", + "db/compaction/compaction_picker.cc", + "db/compaction/compaction_picker_fifo.cc", + "db/compaction/compaction_picker_level.cc", + "db/compaction/compaction_picker_universal.cc", + "db/compaction/sst_partitioner.cc", + "db/convenience.cc", + "db/db_filesnapshot.cc", + "db/db_impl/compacted_db_impl.cc", + "db/db_impl/db_impl.cc", + "db/db_impl/db_impl_compaction_flush.cc", + "db/db_impl/db_impl_debug.cc", + "db/db_impl/db_impl_experimental.cc", + "db/db_impl/db_impl_files.cc", + "db/db_impl/db_impl_open.cc", + "db/db_impl/db_impl_readonly.cc", + "db/db_impl/db_impl_secondary.cc", + "db/db_impl/db_impl_write.cc", + "db/db_info_dumper.cc", + "db/db_iter.cc", + "db/dbformat.cc", + "db/error_handler.cc", + "db/event_helpers.cc", + "db/experimental.cc", + "db/external_sst_file_ingestion_job.cc", + "db/file_indexer.cc", + "db/flush_job.cc", + "db/flush_scheduler.cc", + "db/forward_iterator.cc", + "db/import_column_family_job.cc", + "db/internal_stats.cc", + "db/log_reader.cc", + "db/log_writer.cc", + "db/logs_with_prep_tracker.cc", + "db/malloc_stats.cc", + "db/memtable.cc", + "db/memtable_list.cc", + "db/merge_helper.cc", + "db/merge_operator.cc", + "db/output_validator.cc", + "db/periodic_work_scheduler.cc", + "db/range_del_aggregator.cc", + "db/range_tombstone_fragmenter.cc", + "db/repair.cc", + "db/snapshot_impl.cc", + "db/table_cache.cc", + "db/table_properties_collector.cc", + "db/transaction_log_impl.cc", + "db/trim_history_scheduler.cc", + "db/version_builder.cc", + "db/version_edit.cc", + "db/version_edit_handler.cc", + "db/version_set.cc", + "db/wal_edit.cc", + "db/wal_manager.cc", + "db/write_batch.cc", + "db/write_batch_base.cc", + "db/write_controller.cc", + "db/write_thread.cc", + "env/composite_env.cc", + "env/env.cc", + "env/env_chroot.cc", + "env/env_encryption.cc", + "env/env_hdfs.cc", + "env/env_posix.cc", + "env/file_system.cc", + "env/file_system_tracer.cc", + "env/fs_posix.cc", + "env/fs_remap.cc", + "env/io_posix.cc", + "env/mock_env.cc", + "env/unique_id_gen.cc", + "file/delete_scheduler.cc", + "file/file_prefetch_buffer.cc", + "file/file_util.cc", + "file/filename.cc", + "file/line_file_reader.cc", + "file/random_access_file_reader.cc", + "file/read_write_util.cc", + "file/readahead_raf.cc", + "file/sequence_file_reader.cc", + "file/sst_file_manager_impl.cc", + "file/writable_file_writer.cc", + "logging/auto_roll_logger.cc", + "logging/event_logger.cc", + "logging/log_buffer.cc", + "memory/arena.cc", + "memory/concurrent_arena.cc", + "memory/jemalloc_nodump_allocator.cc", + "memory/memkind_kmem_allocator.cc", + "memory/memory_allocator.cc", + "memtable/alloc_tracker.cc", + "memtable/hash_linklist_rep.cc", + "memtable/hash_skiplist_rep.cc", + "memtable/skiplistrep.cc", + "memtable/vectorrep.cc", + "memtable/write_buffer_manager.cc", + "monitoring/histogram.cc", + "monitoring/histogram_windowing.cc", + "monitoring/in_memory_stats_history.cc", + "monitoring/instrumented_mutex.cc", + "monitoring/iostats_context.cc", + "monitoring/perf_context.cc", + "monitoring/perf_level.cc", + "monitoring/persistent_stats_history.cc", + "monitoring/statistics.cc", + "monitoring/thread_status_impl.cc", + "monitoring/thread_status_updater.cc", + "monitoring/thread_status_updater_debug.cc", + "monitoring/thread_status_util.cc", + "monitoring/thread_status_util_debug.cc", + "options/cf_options.cc", + "options/configurable.cc", + "options/customizable.cc", + "options/db_options.cc", + "options/options.cc", + "options/options_helper.cc", + "options/options_parser.cc", + "port/port_posix.cc", + "port/stack_trace.cc", + "port/win/env_default.cc", + "port/win/env_win.cc", + "port/win/io_win.cc", + "port/win/port_win.cc", + "port/win/win_logger.cc", + "port/win/win_thread.cc", + "table/adaptive/adaptive_table_factory.cc", + "table/block_based/binary_search_index_reader.cc", + "table/block_based/block.cc", + "table/block_based/block_based_filter_block.cc", + "table/block_based/block_based_table_builder.cc", + "table/block_based/block_based_table_factory.cc", + "table/block_based/block_based_table_iterator.cc", + "table/block_based/block_based_table_reader.cc", + "table/block_based/block_builder.cc", + "table/block_based/block_prefetcher.cc", + "table/block_based/block_prefix_index.cc", + "table/block_based/data_block_footer.cc", + "table/block_based/data_block_hash_index.cc", + "table/block_based/filter_block_reader_common.cc", + "table/block_based/filter_policy.cc", + "table/block_based/flush_block_policy.cc", + "table/block_based/full_filter_block.cc", + "table/block_based/hash_index_reader.cc", + "table/block_based/index_builder.cc", + "table/block_based/index_reader_common.cc", + "table/block_based/parsed_full_filter_block.cc", + "table/block_based/partitioned_filter_block.cc", + "table/block_based/partitioned_index_iterator.cc", + "table/block_based/partitioned_index_reader.cc", + "table/block_based/reader_common.cc", + "table/block_based/uncompression_dict_reader.cc", + "table/block_fetcher.cc", + "table/cuckoo/cuckoo_table_builder.cc", + "table/cuckoo/cuckoo_table_factory.cc", + "table/cuckoo/cuckoo_table_reader.cc", + "table/format.cc", + "table/get_context.cc", + "table/iterator.cc", + "table/merging_iterator.cc", + "table/meta_blocks.cc", + "table/persistent_cache_helper.cc", + "table/plain/plain_table_bloom.cc", + "table/plain/plain_table_builder.cc", + "table/plain/plain_table_factory.cc", + "table/plain/plain_table_index.cc", + "table/plain/plain_table_key_coding.cc", + "table/plain/plain_table_reader.cc", + "table/sst_file_dumper.cc", + "table/sst_file_reader.cc", + "table/sst_file_writer.cc", + "table/table_factory.cc", + "table/table_properties.cc", + "table/two_level_iterator.cc", + "table/unique_id.cc", + "test_util/sync_point.cc", + "test_util/sync_point_impl.cc", + "test_util/transaction_test_util.cc", + "tools/dump/db_dump_tool.cc", + "tools/io_tracer_parser_tool.cc", + "tools/ldb_cmd.cc", + "tools/ldb_tool.cc", + "tools/sst_dump_tool.cc", + "trace_replay/block_cache_tracer.cc", + "trace_replay/io_tracer.cc", + "trace_replay/trace_record.cc", + "trace_replay/trace_record_handler.cc", + "trace_replay/trace_record_result.cc", + "trace_replay/trace_replay.cc", + "util/build_version.cc", + "util/coding.cc", + "util/compaction_job_stats_impl.cc", + "util/comparator.cc", + "util/compression_context_cache.cc", + "util/concurrent_task_limiter_impl.cc", + "util/crc32c.cc", + "util/crc32c_arm64.cc", + "util/dynamic_bloom.cc", + "util/file_checksum_helper.cc", + "util/hash.cc", + "util/murmurhash.cc", + "util/random.cc", + "util/rate_limiter.cc", + "util/regex.cc", + "util/ribbon_config.cc", + "util/slice.cc", + "util/status.cc", + "util/string_util.cc", + "util/thread_local.cc", + "util/threadpool_imp.cc", + "util/xxhash.cc", + "utilities/backupable/backupable_db.cc", + "utilities/blob_db/blob_compaction_filter.cc", + "utilities/blob_db/blob_db.cc", + "utilities/blob_db/blob_db_impl.cc", + "utilities/blob_db/blob_db_impl_filesnapshot.cc", + "utilities/blob_db/blob_dump_tool.cc", + "utilities/blob_db/blob_file.cc", + "utilities/cache_dump_load.cc", + "utilities/cache_dump_load_impl.cc", + "utilities/cassandra/cassandra_compaction_filter.cc", + "utilities/cassandra/format.cc", + "utilities/cassandra/merge_operator.cc", + "utilities/checkpoint/checkpoint_impl.cc", + "utilities/compaction_filters.cc", + "utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc", + "utilities/convenience/info_log_finder.cc", + "utilities/debug.cc", + "utilities/env_mirror.cc", + "utilities/env_timed.cc", + "utilities/fault_injection_env.cc", + "utilities/fault_injection_fs.cc", + "utilities/fault_injection_secondary_cache.cc", + "utilities/leveldb_options/leveldb_options.cc", + "utilities/memory/memory_util.cc", + "utilities/merge_operators.cc", + "utilities/merge_operators/bytesxor.cc", + "utilities/merge_operators/max.cc", + "utilities/merge_operators/put.cc", + "utilities/merge_operators/sortlist.cc", + "utilities/merge_operators/string_append/stringappend.cc", + "utilities/merge_operators/string_append/stringappend2.cc", + "utilities/merge_operators/uint64add.cc", + "utilities/object_registry.cc", + "utilities/option_change_migration/option_change_migration.cc", + "utilities/options/options_util.cc", + "utilities/persistent_cache/block_cache_tier.cc", + "utilities/persistent_cache/block_cache_tier_file.cc", + "utilities/persistent_cache/block_cache_tier_metadata.cc", + "utilities/persistent_cache/persistent_cache_tier.cc", + "utilities/persistent_cache/volatile_tier_impl.cc", + "utilities/simulator_cache/cache_simulator.cc", + "utilities/simulator_cache/sim_cache.cc", + "utilities/table_properties_collectors/compact_on_deletion_collector.cc", + "utilities/trace/file_trace_reader_writer.cc", + "utilities/trace/replayer_impl.cc", + "utilities/transactions/lock/lock_manager.cc", + "utilities/transactions/lock/point/point_lock_manager.cc", + "utilities/transactions/lock/point/point_lock_tracker.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc", + "utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc", + "utilities/transactions/lock/range/range_tree/lib/standalone_port.cc", + "utilities/transactions/lock/range/range_tree/lib/util/dbt.cc", + "utilities/transactions/lock/range/range_tree/lib/util/memarena.cc", + "utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc", + "utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc", + "utilities/transactions/optimistic_transaction.cc", + "utilities/transactions/optimistic_transaction_db_impl.cc", + "utilities/transactions/pessimistic_transaction.cc", + "utilities/transactions/pessimistic_transaction_db.cc", + "utilities/transactions/snapshot_checker.cc", + "utilities/transactions/transaction_base.cc", + "utilities/transactions/transaction_db_mutex_impl.cc", + "utilities/transactions/transaction_util.cc", + "utilities/transactions/write_prepared_txn.cc", + "utilities/transactions/write_prepared_txn_db.cc", + "utilities/transactions/write_unprepared_txn.cc", + "utilities/transactions/write_unprepared_txn_db.cc", + "utilities/ttl/db_ttl_impl.cc", + "utilities/wal_filter.cc", + "utilities/write_batch_with_index/write_batch_with_index.cc", + "utilities/write_batch_with_index/write_batch_with_index_internal.cc", + ], + auto_headers = AutoHeaders.RECURSIVE_GLOB, + arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, + compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + link_whole = True, + os_deps = ROCKSDB_OS_DEPS, + os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, + preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, + exported_deps = [], + exported_external_deps = ROCKSDB_EXTERNAL_DEPS, ) cpp_library( @@ -366,7 +803,7 @@ srcs = [ "db/db_test_util.cc", "table/mock_table.cc", - "test_util/fault_injection_test_env.cc", + "test_util/mock_time_env.cc", "test_util/testharness.cc", "test_util/testutil.cc", "tools/block_cache_analyzer/block_cache_trace_analyzer.cc", @@ -376,11 +813,15 @@ auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + link_whole = False, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - deps = [":rocksdb_lib"], - external_deps = ROCKSDB_EXTERNAL_DEPS, + exported_deps = [":rocksdb_lib"], + exported_external_deps = ROCKSDB_EXTERNAL_DEPS + [ + ("googletest", None, "gtest"), + ], ) cpp_library( @@ -389,16 +830,34 @@ "test_util/testutil.cc", "tools/block_cache_analyzer/block_cache_trace_analyzer.cc", "tools/db_bench_tool.cc", + "tools/simulated_hybrid_file_system.cc", "tools/trace_analyzer_tool.cc", ], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + link_whole = False, + os_deps = ROCKSDB_OS_DEPS, + os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, + preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, + exported_deps = [":rocksdb_lib"], + exported_external_deps = ROCKSDB_EXTERNAL_DEPS, +) + +cpp_library( + name = "rocksdb_cache_bench_tools_lib", + srcs = ["cache/cache_bench_tool.cc"], + auto_headers = AutoHeaders.RECURSIVE_GLOB, + arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, + compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + link_whole = False, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - deps = [":rocksdb_lib"], - external_deps = ROCKSDB_EXTERNAL_DEPS, + exported_deps = [":rocksdb_lib"], + exported_external_deps = ROCKSDB_EXTERNAL_DEPS, ) cpp_library( @@ -409,9 +868,13 @@ "db_stress_tool/db_stress_common.cc", "db_stress_tool/db_stress_driver.cc", "db_stress_tool/db_stress_gflags.cc", + "db_stress_tool/db_stress_listener.cc", "db_stress_tool/db_stress_shared_state.cc", + "db_stress_tool/db_stress_stat.cc", "db_stress_tool/db_stress_test_base.cc", "db_stress_tool/db_stress_tool.cc", + "db_stress_tool/expected_state.cc", + "db_stress_tool/multi_ops_txns_stress.cc", "db_stress_tool/no_batched_ops_stress.cc", "test_util/testutil.cc", "tools/block_cache_analyzer/block_cache_trace_analyzer.cc", @@ -420,24 +883,47 @@ auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - deps = [":rocksdb_lib"], - external_deps = ROCKSDB_EXTERNAL_DEPS, + exported_deps = ROCKSDB_LIB_DEPS, + exported_external_deps = ROCKSDB_EXTERNAL_DEPS, ) +cpp_binary( + name = "c_test_bin", + srcs = ["db/c_test.c"], + arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, + compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, + preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, + deps = [":rocksdb_test_lib"], +) if not is_opt_mode else None + +custom_unittest( + name = "c_test", + command = [ + native.package_name() + "/buckifier/rocks_test_runner.sh", + "$(location :{})".format("c_test_bin"), + ], + type = "simple", +) if not is_opt_mode else None + cpp_library( name = "env_basic_test_lib", srcs = ["env/env_basic_test.cc"], auto_headers = AutoHeaders.RECURSIVE_GLOB, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + link_whole = False, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - deps = [":rocksdb_test_lib"], - external_deps = ROCKSDB_EXTERNAL_DEPS, + exported_deps = [":rocksdb_test_lib"], + exported_external_deps = ROCKSDB_EXTERNAL_DEPS, ) # [test_name, test_src, test_type, extra_deps, extra_compiler_flags] @@ -445,21 +931,21 @@ [ "arena_test", "memory/arena_test.cc", - "serial", + "parallel", [], [], ], [ "auto_roll_logger_test", "logging/auto_roll_logger_test.cc", - "serial", + "parallel", [], [], ], [ "autovector_test", "util/autovector_test.cc", - "serial", + "parallel", [], [], ], @@ -471,233 +957,345 @@ [], ], [ + "blob_counting_iterator_test", + "db/blob/blob_counting_iterator_test.cc", + "parallel", + [], + [], + ], + [ "blob_db_test", "utilities/blob_db/blob_db_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "blob_file_addition_test", + "db/blob/blob_file_addition_test.cc", + "parallel", + [], + [], + ], + [ + "blob_file_builder_test", + "db/blob/blob_file_builder_test.cc", + "parallel", + [], + [], + ], + [ + "blob_file_cache_test", + "db/blob/blob_file_cache_test.cc", + "parallel", + [], + [], + ], + [ + "blob_file_garbage_test", + "db/blob/blob_file_garbage_test.cc", + "parallel", + [], + [], + ], + [ + "blob_file_reader_test", + "db/blob/blob_file_reader_test.cc", + "parallel", + [], + [], + ], + [ + "blob_garbage_meter_test", + "db/blob/blob_garbage_meter_test.cc", + "parallel", [], [], ], [ "block_based_filter_block_test", "table/block_based/block_based_filter_block_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "block_based_table_reader_test", + "table/block_based/block_based_table_reader_test.cc", + "parallel", [], [], ], [ "block_cache_trace_analyzer_test", "tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc", - "serial", + "parallel", [], [], ], [ "block_cache_tracer_test", "trace_replay/block_cache_tracer_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "block_fetcher_test", + "table/block_fetcher_test.cc", + "parallel", [], [], ], [ "block_test", "table/block_based/block_test.cc", - "serial", + "parallel", [], [], ], [ "bloom_test", "util/bloom_test.cc", - "serial", + "parallel", [], [], ], [ - "c_test", - "db/c_test.c", - "serial", + "cache_reservation_manager_test", + "cache/cache_reservation_manager_test.cc", + "parallel", [], [], ], [ "cache_simulator_test", "utilities/simulator_cache/cache_simulator_test.cc", - "serial", + "parallel", [], [], ], [ "cache_test", "cache/cache_test.cc", - "serial", + "parallel", [], [], ], [ "cassandra_format_test", "utilities/cassandra/cassandra_format_test.cc", - "serial", + "parallel", [], [], ], [ "cassandra_functional_test", "utilities/cassandra/cassandra_functional_test.cc", - "serial", + "parallel", [], [], ], [ "cassandra_row_merge_test", "utilities/cassandra/cassandra_row_merge_test.cc", - "serial", + "parallel", [], [], ], [ "cassandra_serialize_test", "utilities/cassandra/cassandra_serialize_test.cc", - "serial", + "parallel", [], [], ], [ "checkpoint_test", "utilities/checkpoint/checkpoint_test.cc", - "serial", + "parallel", [], [], ], [ "cleanable_test", "table/cleanable_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "clipping_iterator_test", + "db/compaction/clipping_iterator_test.cc", + "parallel", [], [], ], [ "coding_test", "util/coding_test.cc", - "serial", + "parallel", [], [], ], [ "column_family_test", "db/column_family_test.cc", - "serial", + "parallel", [], [], ], [ "compact_files_test", "db/compact_files_test.cc", - "serial", + "parallel", [], [], ], [ "compact_on_deletion_collector_test", "utilities/table_properties_collectors/compact_on_deletion_collector_test.cc", - "serial", + "parallel", [], [], ], [ "compaction_iterator_test", "db/compaction/compaction_iterator_test.cc", - "serial", + "parallel", [], [], ], [ "compaction_job_stats_test", "db/compaction/compaction_job_stats_test.cc", - "serial", + "parallel", [], [], ], [ "compaction_job_test", "db/compaction/compaction_job_test.cc", - "serial", + "parallel", [], [], ], [ "compaction_picker_test", "db/compaction/compaction_picker_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "compaction_service_test", + "db/compaction/compaction_service_test.cc", + "parallel", [], [], ], [ "comparator_db_test", "db/comparator_db_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "configurable_test", + "options/configurable_test.cc", + "parallel", [], [], ], [ "corruption_test", "db/corruption_test.cc", - "serial", + "parallel", [], [], ], [ "crc32c_test", "util/crc32c_test.cc", - "serial", + "parallel", [], [], ], [ "cuckoo_table_builder_test", "table/cuckoo/cuckoo_table_builder_test.cc", - "serial", + "parallel", [], [], ], [ "cuckoo_table_db_test", "db/cuckoo_table_db_test.cc", - "serial", + "parallel", [], [], ], [ "cuckoo_table_reader_test", "table/cuckoo/cuckoo_table_reader_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "customizable_test", + "options/customizable_test.cc", + "parallel", [], [], ], [ "data_block_hash_index_test", "table/block_based/data_block_hash_index_test.cc", - "serial", + "parallel", [], [], ], [ "db_basic_test", "db/db_basic_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "db_blob_basic_test", + "db/blob/db_blob_basic_test.cc", + "parallel", + [], + [], + ], + [ + "db_blob_compaction_test", + "db/blob/db_blob_compaction_test.cc", + "parallel", + [], + [], + ], + [ + "db_blob_corruption_test", + "db/blob/db_blob_corruption_test.cc", + "parallel", [], [], ], [ "db_blob_index_test", - "db/db_blob_index_test.cc", - "serial", + "db/blob/db_blob_index_test.cc", + "parallel", [], [], ], [ "db_block_cache_test", "db/db_block_cache_test.cc", - "serial", + "parallel", [], [], ], @@ -725,77 +1323,91 @@ [ "db_dynamic_level_test", "db/db_dynamic_level_test.cc", - "serial", + "parallel", [], [], ], [ "db_encryption_test", "db/db_encryption_test.cc", - "serial", + "parallel", [], [], ], [ "db_flush_test", "db/db_flush_test.cc", - "serial", + "parallel", [], [], ], [ "db_inplace_update_test", "db/db_inplace_update_test.cc", - "serial", + "parallel", [], [], ], [ "db_io_failure_test", "db/db_io_failure_test.cc", - "serial", + "parallel", [], [], ], [ "db_iter_stress_test", "db/db_iter_stress_test.cc", - "serial", + "parallel", [], [], ], [ "db_iter_test", "db/db_iter_test.cc", - "serial", + "parallel", [], [], ], [ "db_iterator_test", "db/db_iterator_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "db_kv_checksum_test", + "db/db_kv_checksum_test.cc", + "parallel", [], [], ], [ "db_log_iter_test", "db/db_log_iter_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "db_logical_block_size_cache_test", + "db/db_logical_block_size_cache_test.cc", + "parallel", [], [], ], [ "db_memtable_test", "db/db_memtable_test.cc", - "serial", + "parallel", [], [], ], [ "db_merge_operand_test", "db/db_merge_operand_test.cc", - "serial", + "parallel", [], [], ], @@ -809,28 +1421,28 @@ [ "db_options_test", "db/db_options_test.cc", - "serial", + "parallel", [], [], ], [ "db_properties_test", "db/db_properties_test.cc", - "serial", + "parallel", [], [], ], [ "db_range_del_test", "db/db_range_del_test.cc", - "serial", + "parallel", [], [], ], [ "db_secondary_test", - "db/db_impl/db_secondary_test.cc", - "serial", + "db/db_secondary_test.cc", + "parallel", [], [], ], @@ -844,21 +1456,21 @@ [ "db_statistics_test", "db/db_statistics_test.cc", - "serial", + "parallel", [], [], ], [ "db_table_properties_test", "db/db_table_properties_test.cc", - "serial", + "parallel", [], [], ], [ "db_tailing_iter_test", "db/db_tailing_iter_test.cc", - "serial", + "parallel", [], [], ], @@ -872,7 +1484,7 @@ [ "db_test2", "db/db_test2.cc", - "serial", + "parallel", [], [], ], @@ -891,30 +1503,51 @@ [], ], [ + "db_with_timestamp_basic_test", + "db/db_with_timestamp_basic_test.cc", + "parallel", + [], + [], + ], + [ + "db_with_timestamp_compaction_test", + "db/db_with_timestamp_compaction_test.cc", + "parallel", + [], + [], + ], + [ + "db_write_buffer_manager_test", + "db/db_write_buffer_manager_test.cc", + "parallel", + [], + [], + ], + [ "db_write_test", "db/db_write_test.cc", - "serial", + "parallel", [], [], ], [ "dbformat_test", "db/dbformat_test.cc", - "serial", + "parallel", [], [], ], [ "defer_test", "util/defer_test.cc", - "serial", + "parallel", [], [], ], [ "delete_scheduler_test", "file/delete_scheduler_test.cc", - "serial", + "parallel", [], [], ], @@ -928,21 +1561,21 @@ [ "dynamic_bloom_test", "util/dynamic_bloom_test.cc", - "serial", + "parallel", [], [], ], [ "env_basic_test", "env/env_basic_test.cc", - "serial", + "parallel", [], [], ], [ "env_logger_test", "logging/env_logger_test.cc", - "serial", + "parallel", [], [], ], @@ -956,28 +1589,28 @@ [ "env_timed_test", "utilities/env_timed_test.cc", - "serial", + "parallel", [], [], ], [ - "error_handler_test", - "db/error_handler_test.cc", - "serial", + "error_handler_fs_test", + "db/error_handler_fs_test.cc", + "parallel", [], [], ], [ "event_logger_test", "logging/event_logger_test.cc", - "serial", + "parallel", [], [], ], [ "external_sst_file_basic_test", "db/external_sst_file_basic_test.cc", - "serial", + "parallel", [], [], ], @@ -998,7 +1631,7 @@ [ "file_indexer_test", "db/file_indexer_test.cc", - "serial", + "parallel", [], [], ], @@ -1012,56 +1645,56 @@ [ "filelock_test", "util/filelock_test.cc", - "serial", + "parallel", [], [], ], [ "filename_test", "db/filename_test.cc", - "serial", + "parallel", [], [], ], [ "flush_job_test", "db/flush_job_test.cc", - "serial", + "parallel", [], [], ], [ "full_filter_block_test", "table/block_based/full_filter_block_test.cc", - "serial", + "parallel", [], [], ], [ "hash_table_test", "utilities/persistent_cache/hash_table_test.cc", - "serial", + "parallel", [], [], ], [ "hash_test", "util/hash_test.cc", - "serial", + "parallel", [], [], ], [ "heap_test", "util/heap_test.cc", - "serial", + "parallel", [], [], ], [ "histogram_test", "monitoring/histogram_test.cc", - "serial", + "parallel", [], [], ], @@ -1080,37 +1713,58 @@ [], ], [ + "io_posix_test", + "env/io_posix_test.cc", + "parallel", + [], + [], + ], + [ + "io_tracer_parser_test", + "tools/io_tracer_parser_test.cc", + "parallel", + [], + [], + ], + [ + "io_tracer_test", + "trace_replay/io_tracer_test.cc", + "parallel", + [], + [], + ], + [ "iostats_context_test", "monitoring/iostats_context_test.cc", - "serial", + "parallel", [], [], ], [ "ldb_cmd_test", "tools/ldb_cmd_test.cc", - "serial", + "parallel", [], [], ], [ "listener_test", "db/listener_test.cc", - "serial", + "parallel", [], [], ], [ "log_test", "db/log_test.cc", - "serial", + "parallel", [], [], ], [ "lru_cache_test", "cache/lru_cache_test.cc", - "serial", + "parallel", [], [], ], @@ -1122,114 +1776,128 @@ [], ], [ + "memory_allocator_test", + "memory/memory_allocator_test.cc", + "parallel", + [], + [], + ], + [ "memory_test", "utilities/memory/memory_test.cc", - "serial", + "parallel", [], [], ], [ "memtable_list_test", "db/memtable_list_test.cc", - "serial", + "parallel", [], [], ], [ "merge_helper_test", "db/merge_helper_test.cc", - "serial", + "parallel", [], [], ], [ "merge_test", "db/merge_test.cc", - "serial", + "parallel", [], [], ], [ "merger_test", "table/merger_test.cc", - "serial", + "parallel", [], [], ], [ "mock_env_test", "env/mock_env_test.cc", - "serial", + "parallel", [], [], ], [ "object_registry_test", "utilities/object_registry_test.cc", - "serial", + "parallel", [], [], ], [ "obsolete_files_test", "db/obsolete_files_test.cc", - "serial", + "parallel", [], [], ], [ "optimistic_transaction_test", "utilities/transactions/optimistic_transaction_test.cc", - "serial", + "parallel", [], [], ], [ "option_change_migration_test", "utilities/option_change_migration/option_change_migration_test.cc", - "serial", + "parallel", [], [], ], [ "options_file_test", "db/options_file_test.cc", - "serial", + "parallel", [], [], ], [ "options_settable_test", "options/options_settable_test.cc", - "serial", + "parallel", [], [], ], [ "options_test", "options/options_test.cc", - "serial", + "parallel", [], [], ], [ "options_util_test", "utilities/options/options_util_test.cc", - "serial", + "parallel", [], [], ], [ "partitioned_filter_block_test", "table/block_based/partitioned_filter_block_test.cc", - "serial", + "parallel", [], [], ], [ "perf_context_test", "db/perf_context_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "periodic_work_scheduler_test", + "db/periodic_work_scheduler_test.cc", + "parallel", [], [], ], @@ -1243,133 +1911,168 @@ [ "plain_table_db_test", "db/plain_table_db_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "point_lock_manager_test", + "utilities/transactions/lock/point/point_lock_manager_test.cc", + "parallel", + [], + [], + ], + [ + "prefetch_test", + "file/prefetch_test.cc", + "parallel", [], [], ], [ "prefix_test", "db/prefix_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "random_access_file_reader_test", + "file/random_access_file_reader_test.cc", + "parallel", [], [], ], [ "random_test", "util/random_test.cc", - "serial", + "parallel", [], [], ], [ "range_del_aggregator_test", "db/range_del_aggregator_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "range_locking_test", + "utilities/transactions/lock/range/range_locking_test.cc", + "parallel", [], [], ], [ "range_tombstone_fragmenter_test", "db/range_tombstone_fragmenter_test.cc", - "serial", + "parallel", [], [], ], [ "rate_limiter_test", "util/rate_limiter_test.cc", - "serial", + "parallel", [], [], ], [ "reduce_levels_test", "tools/reduce_levels_test.cc", - "serial", + "parallel", [], [], ], [ "repair_test", "db/repair_test.cc", - "serial", + "parallel", [], [], ], [ "repeatable_thread_test", "util/repeatable_thread_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "ribbon_test", + "util/ribbon_test.cc", + "parallel", [], [], ], [ "sim_cache_test", "utilities/simulator_cache/sim_cache_test.cc", - "serial", + "parallel", [], [], ], [ "skiplist_test", "memtable/skiplist_test.cc", - "serial", + "parallel", [], [], ], [ "slice_test", "util/slice_test.cc", - "serial", + "parallel", [], [], ], [ "slice_transform_test", "util/slice_transform_test.cc", - "serial", + "parallel", [], [], ], [ "sst_dump_test", "tools/sst_dump_test.cc", - "serial", + "parallel", [], [], ], [ "sst_file_reader_test", "table/sst_file_reader_test.cc", - "serial", + "parallel", [], [], ], [ "statistics_test", "monitoring/statistics_test.cc", - "serial", + "parallel", [], [], ], [ "stats_history_test", "monitoring/stats_history_test.cc", - "serial", + "parallel", [], [], ], [ "stringappend_test", "utilities/merge_operators/string_append/stringappend_test.cc", - "serial", + "parallel", [], [], ], [ "table_properties_collector_test", "db/table_properties_collector_test.cc", - "serial", + "parallel", [], [], ], @@ -1381,30 +2084,44 @@ [], ], [ + "testutil_test", + "test_util/testutil_test.cc", + "parallel", + [], + [], + ], + [ "thread_list_test", "util/thread_list_test.cc", - "serial", + "parallel", [], [], ], [ "thread_local_test", "util/thread_local_test.cc", - "serial", + "parallel", [], [], ], [ "timer_queue_test", "util/timer_queue_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "timer_test", + "util/timer_test.cc", + "parallel", [], [], ], [ "trace_analyzer_test", "tools/trace_analyzer_test.cc", - "serial", + "parallel", [], [], ], @@ -1418,77 +2135,84 @@ [ "ttl_test", "utilities/ttl/ttl_test.cc", - "serial", + "parallel", [], [], ], [ "util_merge_operators_test", "utilities/util_merge_operators_test.cc", - "serial", + "parallel", [], [], ], [ "version_builder_test", "db/version_builder_test.cc", - "serial", + "parallel", [], [], ], [ "version_edit_test", "db/version_edit_test.cc", - "serial", + "parallel", [], [], ], [ "version_set_test", "db/version_set_test.cc", - "serial", + "parallel", [], [], ], [ "wal_manager_test", "db/wal_manager_test.cc", - "serial", + "parallel", + [], + [], + ], + [ + "work_queue_test", + "util/work_queue_test.cc", + "parallel", [], [], ], [ "write_batch_test", "db/write_batch_test.cc", - "serial", + "parallel", [], [], ], [ "write_batch_with_index_test", "utilities/write_batch_with_index/write_batch_with_index_test.cc", - "serial", + "parallel", [], [], ], [ "write_buffer_manager_test", "memtable/write_buffer_manager_test.cc", - "serial", + "parallel", [], [], ], [ "write_callback_test", "db/write_callback_test.cc", - "serial", + "parallel", [], [], ], [ "write_controller_test", "db/write_controller_test.cc", - "serial", + "parallel", [], [], ], @@ -1512,18 +2236,18 @@ # Do not build the tests in opt mode, since SyncPoint and other test code # will not be included. [ - test_binary( - extra_compiler_flags = extra_compiler_flags, - extra_deps = extra_deps, - parallelism = parallelism, - rocksdb_arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, - rocksdb_compiler_flags = ROCKSDB_COMPILER_FLAGS, - rocksdb_external_deps = ROCKSDB_EXTERNAL_DEPS, - rocksdb_os_deps = ROCKSDB_OS_DEPS, - rocksdb_os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, - rocksdb_preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - test_cc = test_cc, - test_name = test_name, + cpp_unittest( + name = test_name, + srcs = [test_cc], + arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, + compiler_flags = ROCKSDB_COMPILER_FLAGS + extra_compiler_flags, + include_paths = ROCKSDB_INCLUDE_PATHS, + os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, + preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, + deps = [":rocksdb_test_lib"] + extra_deps, + external_deps = ROCKSDB_EXTERNAL_DEPS + [ + ("googletest", None, "gtest"), + ], ) for test_name, test_cc, parallelism, extra_deps, extra_compiler_flags in ROCKS_TESTS if not is_opt_mode diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/USERS.md mariadb-10.11.13/storage/rocksdb/rocksdb/USERS.md --- mariadb-10.11.11/storage/rocksdb/rocksdb/USERS.md 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/USERS.md 2025-05-19 16:14:27.000000000 +0000 @@ -26,6 +26,9 @@ ## Yahoo Yahoo is using RocksDB as a storage engine for their biggest distributed data store Sherpa. Learn more about it here: http://yahooeng.tumblr.com/post/120730204806/sherpa-scales-new-heights +## Baidu +[Apache Doris](http://doris.apache.org/master/en/) is a MPP analytical database engine released by Baidu. It [uses RocksDB](http://doris.apache.org/master/en/administrator-guide/operation/tablet-meta-tool.html) to manage its tablet's metadata. + ## CockroachDB CockroachDB is an open-source geo-replicated transactional database. They are using RocksDB as their storage engine. Check out their github: https://github.com/cockroachdb/cockroach @@ -44,7 +47,7 @@ Turn is using RocksDB as a storage layer for their key/value store, serving at peak 2.4MM QPS out of different datacenters. Check out our RocksDB Protobuf merge operator at: https://github.com/vladb38/rocksdb_protobuf -## Santanader UK/Cloudera Profession Services +## Santander UK/Cloudera Profession Services Check out their blog post: http://blog.cloudera.com/blog/2015/08/inside-santanders-near-real-time-data-ingest-architecture/ ## Airbnb @@ -67,7 +70,7 @@ [VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed. ## quasardb -[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark. +[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark. quasardb uses a heavily tuned RocksDB as its persistence layer. ## Netflix @@ -86,7 +89,7 @@ [Uber](http://eng.uber.com/cherami/) uses RocksDB as a durable and scalable task queue. ## 360 Pika -[360](http://www.360.cn/) [Pika](https://github.com/Qihoo360/pika) is a nosql compatible with redis. With the huge amount of data stored, redis may suffer for a capacity bottleneck, and pika was born for solving it. It has widely been widely used in many company +[360](http://www.360.cn/) [Pika](https://github.com/Qihoo360/pika) is a nosql compatible with redis. With the huge amount of data stored, redis may suffer for a capacity bottleneck, and pika was born for solving it. It has widely been used in many companies. ## LzLabs LzLabs is using RocksDB as a storage engine in their multi-database distributed framework to store application configuration and user data. @@ -96,13 +99,28 @@ ## IOTA Foundation [IOTA Foundation](https://www.iota.org/) is using RocksDB in the [IOTA Reference Implementation (IRI)](https://github.com/iotaledger/iri) to store the local state of the Tangle. The Tangle is the first open-source distributed ledger powering the future of the Internet of Things. - + ## Avrio Project [Avrio Project](http://avrio-project.github.io/avrio.network/) is using RocksDB in [Avrio ](https://github.com/avrio-project/avrio) to store blocks, account balances and data and other blockchain-releated data. Avrio is a multiblockchain decentralized cryptocurrency empowering monetary transactions. - + ## Crux [Crux](https://github.com/juxt/crux) is a document database that uses RocksDB for local [EAV](https://en.wikipedia.org/wiki/Entity%E2%80%93attribute%E2%80%93value_model) index storage to enable point-in-time bitemporal Datalog queries. The "unbundled" architecture uses Kafka to provide horizontal scalability. ## Nebula Graph - [Nebula Graph](https://github.com/vesoft-inc/nebula) is a distributed, scalable, lightning-fast, open source graph database capable of hosting super large scale graphs with dozens of billions of vertices (nodes) and trillions of edges, with milliseconds of latency. + +## YugabyteDB +[YugabyteDB](https://www.yugabyte.com/) is an open source, high performance, distributed SQL database that uses RocksDB as its storage layer. For more information, please see https://github.com/yugabyte/yugabyte-db/. + +## ArangoDB +[ArangoDB](https://www.arangodb.com/) is a native multi-model database with flexible data models for documents, graphs, and key-values, for building high performance applications using a convenient SQL-like query language or JavaScript extensions. It uses RocksDB as its storage engine. + +## Milvus +[Milvus](https://milvus.io/) is an open source vector database for unstructured data. It uses RocksDB not only as one of the supported kv storage engines, but also as a message queue. + +## Kafka +[Kafka](https://kafka.apache.org/) is an open-source distributed event streaming platform, it uses RocksDB to store state in Kafka Streams: https://www.confluent.io/blog/how-to-tune-rocksdb-kafka-streams-state-stores-performance/. + +## Others +More databases using RocksDB can be found at [dbdb.io](https://dbdb.io/browse?embeds=rocksdb). + diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/WINDOWS_PORT.md mariadb-10.11.13/storage/rocksdb/rocksdb/WINDOWS_PORT.md --- mariadb-10.11.11/storage/rocksdb/rocksdb/WINDOWS_PORT.md 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/WINDOWS_PORT.md 2025-05-19 16:14:27.000000000 +0000 @@ -24,7 +24,7 @@ * make all unit test pass both in debug and release builds. * Note: latest introduction of SyncPoint seems to disable running db_test in Release. * make performance on par with published benchmarks accounting for HW differences -* we would like to keep the port code inline with the master branch with no forking +* we would like to keep the port code inline with the main branch with no forking ## Build system We have chosen CMake as a widely accepted build system to build the Windows port. It is very fast and convenient. @@ -66,7 +66,7 @@ Even though Windows provides its own efficient thread-pool implementation we chose to replicate posix logic using `std::thread` primitives. This allows anyone to quickly detect any changes within the posix source code and replicate them within windows env. This has proven to work very well. At the same time for anyone who wishes to replace the built-in thread-pool can do so using RocksDB stackable environments. For disk access we implemented all of the functionality present within the posix_env which includes memory mapped files, random access, rate-limiter support etc. -The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It’s not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST. +The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It's not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST. We have replaced `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure so we can atomically seek to the position of the disk operation but still perform the operation synchronously. Thus we able to emulate that functionality of `pread/pwrite` reasonably well. The only difference is that the file pointer is not returned to its original position but that hardly matters given the random nature of access. diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/appveyor.yml mariadb-10.11.13/storage/rocksdb/rocksdb/appveyor.yml --- mariadb-10.11.11/storage/rocksdb/rocksdb/appveyor.yml 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/appveyor.yml 2025-05-19 16:14:27.000000000 +0000 @@ -1,6 +1,6 @@ version: 1.0.{build} -image: Visual Studio 2017 +image: Visual Studio 2019 environment: JAVA_HOME: C:\Program Files\Java\jdk1.8.0 @@ -21,9 +21,6 @@ - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 CMAKE_GENERATOR: Visual Studio 14 Win64 DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio 14.0\Common7\IDE\devenv.com - - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - CMAKE_GENERATOR: Visual Studio 15 Win64 - DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\devenv.com install: - md %THIRDPARTY_HOME% @@ -34,7 +31,8 @@ - cd snappy-1.1.7 - mkdir build - cd build - - cmake -G "%CMAKE_GENERATOR%" .. + - if DEFINED CMAKE_PLATEFORM_NAME (set "PLATEFORM_OPT=-A %CMAKE_PLATEFORM_NAME%") + - cmake .. -G "%CMAKE_GENERATOR%" %PLATEFORM_OPT% - msbuild Snappy.sln /p:Configuration=Debug /p:Platform=x64 - msbuild Snappy.sln /p:Configuration=Release /p:Platform=x64 - echo "Building LZ4 dependency..." @@ -57,7 +55,8 @@ before_build: - md %APPVEYOR_BUILD_FOLDER%\build - cd %APPVEYOR_BUILD_FOLDER%\build - - cmake -G "%CMAKE_GENERATOR%" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DLZ4=1 -DZSTD=1 -DXPRESS=1 -DJNI=1 .. + - if DEFINED CMAKE_PLATEFORM_NAME (set "PLATEFORM_OPT=-A %CMAKE_PLATEFORM_NAME%") + - cmake .. -G "%CMAKE_GENERATOR%" %PLATEFORM_OPT% %CMAKE_OPT% -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DLZ4=1 -DZSTD=1 -DXPRESS=1 -DJNI=1 -DWITH_ALL_TESTS=0 - cd .. build: @@ -68,7 +67,7 @@ test: test_script: - - ps: build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test2,db_test,env_basic_test,env_test,db_merge_operand_test -Concurrency 8 + - ps: build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,env_basic_test -Concurrency 8 on_failure: - cmd: 7z a build-failed.zip %APPVEYOR_BUILD_FOLDER%\build\ && appveyor PushArtifact build-failed.zip diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/buckifier/buckify_rocksdb.py mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/buckify_rocksdb.py --- mariadb-10.11.11/storage/rocksdb/rocksdb/buckifier/buckify_rocksdb.py 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/buckify_rocksdb.py 2025-05-19 16:14:27.000000000 +0000 @@ -20,14 +20,14 @@ # User can pass extra dependencies as a JSON object via command line, and this # script can include these dependencies in the generate TARGETS file. # Usage: -# $python buckifier/buckify_rocksdb.py +# $python3 buckifier/buckify_rocksdb.py # (This generates a TARGET file without user-specified dependency for unit # tests.) -# $python buckifier/buckify_rocksdb.py \ -# '{"fake": { \ -# "extra_deps": [":test_dep", "//fakes/module:mock1"], \ -# "extra_compiler_flags": ["-DROCKSDB_LITE", "-Os"], \ -# } \ +# $python3 buckifier/buckify_rocksdb.py \ +# '{"fake": { +# "extra_deps": [":test_dep", "//fakes/module:mock1"], +# "extra_compiler_flags": ["-DROCKSDB_LITE", "-Os"] +# } # }' # (Generated TARGETS file has test_dep and mock1 as dependencies for RocksDB # unit tests, and will use the extra_compiler_flags to compile the unit test @@ -48,8 +48,8 @@ if '=' in line: current_src = line.split('=')[0].strip() src_files[current_src] = [] - elif '.cc' in line: - src_path = line.split('.cc')[0].strip() + '.cc' + elif '.c' in line: + src_path = line.split('\\')[0].strip() src_files[current_src].append(src_path) return src_files @@ -69,45 +69,28 @@ return cc_files -# Get tests from Makefile -def get_tests(repo_path): +# Get non_parallel tests from Makefile +def get_non_parallel_tests(repo_path): Makefile = repo_path + "/Makefile" - # Dictionary TEST_NAME => IS_PARALLEL - tests = {} + s = set({}) - found_tests = False + found_non_parallel_tests = False for line in open(Makefile): line = line.strip() - if line.startswith("TESTS ="): - found_tests = True - elif found_tests: + if line.startswith("NON_PARALLEL_TEST ="): + found_non_parallel_tests = True + elif found_non_parallel_tests: if line.endswith("\\"): # remove the trailing \ line = line[:-1] line = line.strip() - tests[line] = False + s.add(line) else: - # we consumed all the tests + # we consumed all the non_parallel tests break - found_parallel_tests = False - for line in open(Makefile): - line = line.strip() - if line.startswith("PARALLEL_TEST ="): - found_parallel_tests = True - elif found_parallel_tests: - if line.endswith("\\"): - # remove the trailing \ - line = line[:-1] - line = line.strip() - tests[line] = True - else: - # we consumed all the parallel tests - break - - return tests - + return s # Parse extra dependencies passed by user from command line def get_dependencies(): @@ -140,18 +123,38 @@ src_mk = parse_src_mk(repo_path) # get all .cc files cc_files = get_cc_files(repo_path) - # get tests from Makefile - tests = get_tests(repo_path) + # get non_parallel tests from Makefile + non_parallel_tests = get_non_parallel_tests(repo_path) - if src_mk is None or cc_files is None or tests is None: + if src_mk is None or cc_files is None or non_parallel_tests is None: return False - TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path) + extra_argv = "" + if len(sys.argv) >= 2: + # Heuristically quote and canonicalize whitespace for inclusion + # in how the file was generated. + extra_argv = " '{0}'".format(" ".join(sys.argv[1].split())) + + TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path, extra_argv) + # rocksdb_lib TARGETS.add_library( "rocksdb_lib", src_mk["LIB_SOURCES"] + + # always add range_tree, it's only excluded on ppc64, which we don't use internally + src_mk["RANGE_TREE_SOURCES"] + src_mk["TOOL_LIB_SOURCES"]) + # rocksdb_whole_archive_lib + TARGETS.add_library( + "rocksdb_whole_archive_lib", + src_mk["LIB_SOURCES"] + + # always add range_tree, it's only excluded on ppc64, which we don't use internally + src_mk["RANGE_TREE_SOURCES"] + + src_mk["TOOL_LIB_SOURCES"], + deps=None, + headers=None, + extra_external_deps="", + link_whole=True) # rocksdb_test_lib TARGETS.add_library( "rocksdb_test_lib", @@ -159,7 +162,10 @@ src_mk.get("TEST_LIB_SOURCES", []) + src_mk.get("EXP_LIB_SOURCES", []) + src_mk.get("ANALYZER_LIB_SOURCES", []), - [":rocksdb_lib"]) + [":rocksdb_lib"], + extra_external_deps=""" + [ + ("googletest", None, "gtest"), + ]""") # rocksdb_tools_lib TARGETS.add_library( "rocksdb_tools_lib", @@ -167,41 +173,56 @@ src_mk.get("ANALYZER_LIB_SOURCES", []) + ["test_util/testutil.cc"], [":rocksdb_lib"]) - # rocksdb_stress_lib + # rocksdb_cache_bench_tools_lib TARGETS.add_library( + "rocksdb_cache_bench_tools_lib", + src_mk.get("CACHE_BENCH_LIB_SOURCES", []), + [":rocksdb_lib"]) + # rocksdb_stress_lib + TARGETS.add_rocksdb_library( "rocksdb_stress_lib", src_mk.get("ANALYZER_LIB_SOURCES", []) + src_mk.get('STRESS_LIB_SOURCES', []) - + ["test_util/testutil.cc"], - [":rocksdb_lib"]) + + ["test_util/testutil.cc"]) + + print("Extra dependencies:\n{0}".format(json.dumps(deps_map))) + + # Dictionary test executable name -> relative source file path + test_source_map = {} + print(src_mk) + + # c_test.c is added through TARGETS.add_c_test(). If there + # are more than one .c test file, we need to extend + # TARGETS.add_c_test() to include other C tests too. + for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []): + if test_src != 'db/c_test.c': + print("Don't know how to deal with " + test_src) + return False + TARGETS.add_c_test() + + for test_src in src_mk.get("TEST_MAIN_SOURCES", []): + test = test_src.split('.c')[0].strip().split('/')[-1].strip() + test_source_map[test] = test_src + print("" + test + " " + test_src) - print("Extra dependencies:\n{0}".format(str(deps_map))) - # test for every test we found in the Makefile for target_alias, deps in deps_map.items(): - for test in sorted(tests): - match_src = [src for src in cc_files if ("/%s.c" % test) in src] - if len(match_src) == 0: - print(ColorString.warning("Cannot find .cc file for %s" % test)) - continue - elif len(match_src) > 1: - print(ColorString.warning("Found more than one .cc for %s" % test)) - print(match_src) + for test, test_src in sorted(test_source_map.items()): + if len(test) == 0: + print(ColorString.warning("Failed to get test name for %s" % test_src)) continue - assert(len(match_src) == 1) - is_parallel = tests[test] test_target_name = \ test if not target_alias else test + "_" + target_alias TARGETS.register_test( test_target_name, - match_src[0], - is_parallel, - deps['extra_deps'], - deps['extra_compiler_flags']) + test_src, + test not in non_parallel_tests, + json.dumps(deps['extra_deps']), + json.dumps(deps['extra_compiler_flags'])) if test in _EXPORTED_TEST_LIBS: test_library = "%s_lib" % test_target_name - TARGETS.add_library(test_library, match_src, [":rocksdb_test_lib"]) + TARGETS.add_library(test_library, [test_src], [":rocksdb_test_lib"]) TARGETS.flush_tests() print(ColorString.info("Generated TARGETS Summary:")) @@ -220,6 +241,7 @@ return rocksdb_path + def exit_with_error(msg): print(ColorString.error(msg)) sys.exit(1) diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/buckifier/check_buck_targets.sh mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/check_buck_targets.sh --- mariadb-10.11.11/storage/rocksdb/rocksdb/buckifier/check_buck_targets.sh 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/check_buck_targets.sh 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# If clang_format_diff.py command is not specfied, we assume we are able to +# access directly without any path. + +TGT_DIFF=`git diff TARGETS | head -n 1` + +if [ ! -z "$TGT_DIFF" ] +then + echo "TARGETS file has uncommitted changes. Skip this check." + exit 0 +fi + +echo Backup original TARGETS file. + +cp TARGETS TARGETS.bkp + +${PYTHON:-python3} buckifier/buckify_rocksdb.py + +TGT_DIFF=`git diff TARGETS | head -n 1` + +if [ -z "$TGT_DIFF" ] +then + mv TARGETS.bkp TARGETS + exit 0 +else + echo "Please run '${PYTHON:-python3} buckifier/buckify_rocksdb.py' to update TARGETS file." + echo "Do not manually update TARGETS file." + ${PYTHON:-python3} --version + mv TARGETS.bkp TARGETS + exit 1 +fi diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/buckifier/targets_builder.py mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/targets_builder.py --- mariadb-10.11.11/storage/rocksdb/rocksdb/buckifier/targets_builder.py 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/targets_builder.py 2025-05-19 16:14:27.000000000 +0000 @@ -25,10 +25,12 @@ class TARGETSBuilder(object): - def __init__(self, path): + def __init__(self, path, extra_argv): self.path = path - self.targets_file = open(path, 'w') - self.targets_file.write(targets_cfg.rocksdb_target_header) + self.targets_file = open(path, 'wb') + header = targets_cfg.rocksdb_target_header_template.format( + extra_argv=extra_argv) + self.targets_file.write(header.encode("utf-8")) self.total_lib = 0 self.total_bin = 0 self.total_test = 0 @@ -37,26 +39,68 @@ def __del__(self): self.targets_file.close() - def add_library(self, name, srcs, deps=None, headers=None): + def add_library(self, name, srcs, deps=None, headers=None, + extra_external_deps="", link_whole=False): headers_attr_prefix = "" if headers is None: headers_attr_prefix = "auto_" headers = "AutoHeaders.RECURSIVE_GLOB" + else: + headers = "[" + pretty_list(headers) + "]" self.targets_file.write(targets_cfg.library_template.format( name=name, srcs=pretty_list(srcs), headers_attr_prefix=headers_attr_prefix, headers=headers, - deps=pretty_list(deps))) + deps=pretty_list(deps), + extra_external_deps=extra_external_deps, + link_whole=link_whole).encode("utf-8")) + self.total_lib = self.total_lib + 1 + + def add_rocksdb_library(self, name, srcs, headers=None): + headers_attr_prefix = "" + if headers is None: + headers_attr_prefix = "auto_" + headers = "AutoHeaders.RECURSIVE_GLOB" + else: + headers = "[" + pretty_list(headers) + "]" + self.targets_file.write(targets_cfg.rocksdb_library_template.format( + name=name, + srcs=pretty_list(srcs), + headers_attr_prefix=headers_attr_prefix, + headers=headers).encode("utf-8")) self.total_lib = self.total_lib + 1 def add_binary(self, name, srcs, deps=None): - self.targets_file.write(targets_cfg.binary_template % ( - name, - pretty_list(srcs), - pretty_list(deps))) + self.targets_file.write(targets_cfg.binary_template.format( + name=name, + srcs=pretty_list(srcs), + deps=pretty_list(deps)).encode("utf-8")) self.total_bin = self.total_bin + 1 + def add_c_test(self): + self.targets_file.write(b""" +cpp_binary( + name = "c_test_bin", + srcs = ["db/c_test.c"], + arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, + compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, + preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, + deps = [":rocksdb_test_lib"], +) if not is_opt_mode else None + +custom_unittest( + name = "c_test", + command = [ + native.package_name() + "/buckifier/rocks_test_runner.sh", + "$(location :{})".format("c_test_bin"), + ], + type = "simple", +) if not is_opt_mode else None +""") + def register_test(self, test_name, src, @@ -76,5 +120,5 @@ self.total_test = self.total_test + 1 def flush_tests(self): - self.targets_file.write(targets_cfg.unittests_template % self.tests_cfg) + self.targets_file.write(targets_cfg.unittests_template.format(tests=self.tests_cfg).encode("utf-8")) self.tests_cfg = "" diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/buckifier/targets_cfg.py mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/targets_cfg.py --- mariadb-10.11.11/storage/rocksdb/rocksdb/buckifier/targets_cfg.py 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/buckifier/targets_cfg.py 2025-05-19 16:14:27.000000000 +0000 @@ -4,7 +4,9 @@ from __future__ import print_function from __future__ import unicode_literals -rocksdb_target_header = """# This file \100generated by `python buckifier/buckify_rocksdb.py` +rocksdb_target_header_template = \ + """# This file \100generated by: +#$ python3 buckifier/buckify_rocksdb.py{extra_argv} # --> DO NOT EDIT MANUALLY <-- # This file is a Facebook-specific integration for buck builds, so can # only be validated by Facebook employees. @@ -15,7 +17,7 @@ REPO_PATH = package_name() + "/" -ROCKSDB_COMPILER_FLAGS = [ +ROCKSDB_COMPILER_FLAGS_0 = [ "-fno-builtin-memcmp", # Needed to compile in fbcode "-Wno-expansion-to-defined", @@ -30,19 +32,25 @@ ("zlib", None, "z"), ("gflags", None, "gflags"), ("lz4", None, "lz4"), - ("zstd", None), - ("tbb", None), - ("googletest", None, "gtest"), + ("zstd", None, "zstd"), ] -ROCKSDB_OS_DEPS = [ +ROCKSDB_OS_DEPS_0 = [ ( "linux", - ["third-party//numa:numa", "third-party//liburing:uring"], + [ + "third-party//numa:numa", + "third-party//liburing:uring", + "third-party//tbb:tbb", + ], + ), + ( + "macos", + ["third-party//tbb:tbb"], ), ] -ROCKSDB_OS_PREPROCESSOR_FLAGS = [ +ROCKSDB_OS_PREPROCESSOR_FLAGS_0 = [ ( "linux", [ @@ -56,17 +64,33 @@ "-DHAVE_SSE42", "-DLIBURING", "-DNUMA", + "-DROCKSDB_PLATFORM_POSIX", + "-DROCKSDB_LIB_IO_POSIX", + "-DTBB", ], ), ( "macos", - ["-DOS_MACOSX"], + [ + "-DOS_MACOSX", + "-DROCKSDB_PLATFORM_POSIX", + "-DROCKSDB_LIB_IO_POSIX", + "-DTBB", + ], + ), + ( + "windows", + [ + "-DOS_WIN", + "-DWIN32", + "-D_MBCS", + "-DWIN64", + "-DNOMINMAX", + ], ), ] ROCKSDB_PREPROCESSOR_FLAGS = [ - "-DROCKSDB_PLATFORM_POSIX", - "-DROCKSDB_LIB_IO_POSIX", "-DROCKSDB_SUPPORT_THREAD_LOCAL", # Flags to enable libs we include @@ -77,21 +101,22 @@ "-DZSTD", "-DZSTD_STATIC_LINKING_ONLY", "-DGFLAGS=gflags", - "-DTBB", # Added missing flags from output of build_detect_platform "-DROCKSDB_BACKTRACE", +] - # Directories with files for #include - "-I" + REPO_PATH + "include/", - "-I" + REPO_PATH, +# Directories with files for #include +ROCKSDB_INCLUDE_PATHS = [ + "", + "include", ] -ROCKSDB_ARCH_PREPROCESSOR_FLAGS = { +ROCKSDB_ARCH_PREPROCESSOR_FLAGS = {{ "x86_64": [ "-DHAVE_PCLMUL", ], -} +}} build_mode = read_config("fbcode", "build_mode") @@ -99,21 +124,26 @@ # -DNDEBUG is added by default in opt mode in fbcode. But adding it twice # doesn't harm and avoid forgetting to add it. -ROCKSDB_COMPILER_FLAGS += (["-DNDEBUG"] if is_opt_mode else []) +ROCKSDB_COMPILER_FLAGS = ROCKSDB_COMPILER_FLAGS_0 + (["-DNDEBUG"] if is_opt_mode else []) sanitizer = read_config("fbcode", "sanitizer") # Do not enable jemalloc if sanitizer presents. RocksDB will further detect # whether the binary is linked with jemalloc at runtime. -ROCKSDB_OS_PREPROCESSOR_FLAGS += ([( +ROCKSDB_OS_PREPROCESSOR_FLAGS = ROCKSDB_OS_PREPROCESSOR_FLAGS_0 + ([( "linux", ["-DROCKSDB_JEMALLOC"], )] if sanitizer == "" else []) -ROCKSDB_OS_DEPS += ([( +ROCKSDB_OS_DEPS = ROCKSDB_OS_DEPS_0 + ([( "linux", ["third-party//jemalloc:headers"], )] if sanitizer == "" else []) + +ROCKSDB_LIB_DEPS = [ + ":rocksdb_lib", + ":rocksdb_test_lib", +] if not is_opt_mode else [":rocksdb_lib"] """ @@ -124,22 +154,41 @@ {headers_attr_prefix}headers = {headers}, arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + link_whole = {link_whole}, os_deps = ROCKSDB_OS_DEPS, os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - deps = [{deps}], - external_deps = ROCKSDB_EXTERNAL_DEPS, + exported_deps = [{deps}], + exported_external_deps = ROCKSDB_EXTERNAL_DEPS{extra_external_deps}, +) +""" + +rocksdb_library_template = """ +cpp_library( + name = "{name}", + srcs = [{srcs}], + {headers_attr_prefix}headers = {headers}, + arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, + compiler_flags = ROCKSDB_COMPILER_FLAGS, + include_paths = ROCKSDB_INCLUDE_PATHS, + os_deps = ROCKSDB_OS_DEPS, + os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, + preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, + exported_deps = ROCKSDB_LIB_DEPS, + exported_external_deps = ROCKSDB_EXTERNAL_DEPS, ) """ binary_template = """ cpp_binary( - name = "%s", - srcs = [%s], + name = "{name}", + srcs = [{srcs}], arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, compiler_flags = ROCKSDB_COMPILER_FLAGS, preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - deps = [%s], + include_paths = ROCKSDB_INCLUDE_PATHS, + deps = [{deps}], external_deps = ROCKSDB_EXTERNAL_DEPS, ) """ @@ -156,24 +205,24 @@ unittests_template = """ # [test_name, test_src, test_type, extra_deps, extra_compiler_flags] ROCKS_TESTS = [ -%s] +{tests}] # Generate a test rule for each entry in ROCKS_TESTS # Do not build the tests in opt mode, since SyncPoint and other test code # will not be included. [ - test_binary( - extra_compiler_flags = extra_compiler_flags, - extra_deps = extra_deps, - parallelism = parallelism, - rocksdb_arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, - rocksdb_compiler_flags = ROCKSDB_COMPILER_FLAGS, - rocksdb_external_deps = ROCKSDB_EXTERNAL_DEPS, - rocksdb_os_deps = ROCKSDB_OS_DEPS, - rocksdb_os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, - rocksdb_preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, - test_cc = test_cc, - test_name = test_name, + cpp_unittest( + name = test_name, + srcs = [test_cc], + arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS, + compiler_flags = ROCKSDB_COMPILER_FLAGS + extra_compiler_flags, + include_paths = ROCKSDB_INCLUDE_PATHS, + os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS, + preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS, + deps = [":rocksdb_test_lib"] + extra_deps, + external_deps = ROCKSDB_EXTERNAL_DEPS + [ + ("googletest", None, "gtest"), + ], ) for test_name, test_cc, parallelism, extra_deps, extra_compiler_flags in ROCKS_TESTS if not is_opt_mode diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/build_detect_platform mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/build_detect_platform --- mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/build_detect_platform 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/build_detect_platform 2025-05-19 16:14:27.000000000 +0000 @@ -9,6 +9,7 @@ # PLATFORM_LDFLAGS Linker flags # JAVA_LDFLAGS Linker flags for RocksDBJava # JAVA_STATIC_LDFLAGS Linker flags for RocksDBJava static build +# JAVAC_ARGS Arguments for javac # PLATFORM_SHARED_EXT Extension for shared libraries # PLATFORM_SHARED_LDFLAGS Flags for building shared library # PLATFORM_SHARED_CFLAGS Flags for compiling objects for shared library @@ -27,6 +28,7 @@ # -DZSTD if the ZSTD library is present # -DNUMA if the NUMA library is present # -DTBB if the TBB library is present +# -DMEMKIND if the memkind library is present # # Using gflags in rocksdb: # Our project depends on gflags, which requires users to take some extra steps @@ -43,8 +45,13 @@ exit 1 fi -# we depend on C++11 -PLATFORM_CXXFLAGS="-std=c++11" +# we depend on C++11, but should be compatible with newer standards +if [ "$ROCKSDB_CXX_STANDARD" ]; then + PLATFORM_CXXFLAGS="-std=$ROCKSDB_CXX_STANDARD" +else + PLATFORM_CXXFLAGS="-std=c++11" +fi + # we currently depend on POSIX platform COMMON_FLAGS="-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX" @@ -58,8 +65,12 @@ source "$PWD/build_tools/fbcode_config4.8.1.sh" elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_5xx" ]; then source "$PWD/build_tools/fbcode_config.sh" - else + elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM007" ]; then source "$PWD/build_tools/fbcode_config_platform007.sh" + elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM009" ]; then + source "$PWD/build_tools/fbcode_config_platform009.sh" + else + source "$PWD/build_tools/fbcode_config_platform009.sh" fi fi @@ -87,6 +98,16 @@ fi fi +if test -z "$AR"; then + if [ -x "$(command -v gcc-ar)" ]; then + AR=gcc-ar + elif [ -x "$(command -v llvm-ar)" ]; then + AR=llvm-ar + else + AR=ar + fi +fi + # Detect OS if test -z "$TARGET_OS"; then TARGET_OS=`uname -s` @@ -149,10 +170,13 @@ else PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" fi - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" - if test $ROCKSDB_USE_IO_URING; then + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -ldl" + if test -z "$ROCKSDB_USE_IO_URING"; then + ROCKSDB_USE_IO_URING=1 + fi + if test "$ROCKSDB_USE_IO_URING" -ne 0; then # check for liburing - $CXX $CFLAGS -x c++ - -luring -o /dev/null 2>/dev/null </dev/null < int main() { struct io_uring ring; @@ -165,9 +189,6 @@ COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_IOURING_PRESENT" fi fi - if test -z "$USE_FOLLY_DISTRIBUTED_MUTEX"; then - USE_FOLLY_DISTRIBUTED_MUTEX=1 - fi # PORT_FILES=port/linux/linux_specific.cc ;; SunOS) @@ -190,6 +211,17 @@ PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread" # PORT_FILES=port/freebsd/freebsd_specific.cc ;; + GNU/kFreeBSD) + PLATFORM=OS_GNU_KFREEBSD + COMMON_FLAGS="$COMMON_FLAGS -DOS_GNU_KFREEBSD" + if [ -z "$USE_CLANG" ]; then + COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp" + else + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic" + fi + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt" + # PORT_FILES=port/gnu_kfreebsd/gnu_kfreebsd_specific.cc + ;; NetBSD) PLATFORM=OS_NETBSD COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD" @@ -239,15 +271,20 @@ PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}" JAVA_LDFLAGS="$PLATFORM_LDFLAGS" JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS" +JAVAC_ARGS="-source 7" if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then # Cross-compiling; do not try any compilation tests. # Also don't need any compilation tests if compiling on fbcode + if [ "$FBCODE_BUILD" = "true" ]; then + # Enable backtrace on fbcode since the necessary libraries are present + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" + fi true else if ! test $ROCKSDB_DISABLE_FALLOCATE; then # Test whether fallocate is available - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < #include int main() { @@ -263,7 +300,7 @@ if ! test $ROCKSDB_DISABLE_SNAPPY; then # Test whether Snappy library is installed # http://code.google.com/p/snappy/ - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() {} EOF @@ -278,30 +315,38 @@ # Test whether gflags library is installed # http://gflags.github.io/gflags/ # check if the namespace is gflags - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF + if $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF #include + using namespace GFLAGS_NAMESPACE; int main() {} EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" - else - # check if namespace is google - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF + then + COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" + # check if namespace is gflags + elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF + #include + using namespace gflags; + int main() {} +EOF + then + COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=gflags" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" + # check if namespace is google + elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF #include using namespace google; int main() {} EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=google" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" - fi + then + COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=google" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags" fi fi if ! test $ROCKSDB_DISABLE_ZLIB; then # Test whether zlib library is installed - $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() {} EOF @@ -314,7 +359,7 @@ if ! test $ROCKSDB_DISABLE_BZIP; then # Test whether bzip library is installed - $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() {} EOF @@ -327,7 +372,7 @@ if ! test $ROCKSDB_DISABLE_LZ4; then # Test whether lz4 library is installed - $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < #include int main() {} @@ -341,7 +386,7 @@ if ! test $ROCKSDB_DISABLE_ZSTD; then # Test whether zstd library is installed - $CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() {} EOF @@ -354,7 +399,7 @@ if ! test $ROCKSDB_DISABLE_NUMA; then # Test whether numa is available - $CXX $CFLAGS -x c++ - -o /dev/null -lnuma 2>/dev/null </dev/null < #include int main() {} @@ -368,7 +413,7 @@ if ! test $ROCKSDB_DISABLE_TBB; then # Test whether tbb is available - $CXX $CFLAGS $LDFLAGS -x c++ - -o /dev/null -ltbb 2>/dev/null </dev/null < int main() {} EOF @@ -381,7 +426,7 @@ if ! test $ROCKSDB_DISABLE_JEMALLOC; then # Test whether jemalloc is available - if echo 'int main() {}' | $CXX $CFLAGS -x c++ - -o /dev/null -ljemalloc \ + if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -ljemalloc \ 2>/dev/null; then # This will enable some preprocessor identifiers in the Makefile JEMALLOC=1 @@ -402,7 +447,7 @@ fi if ! test $JEMALLOC && ! test $ROCKSDB_DISABLE_TCMALLOC; then # jemalloc is not available. Let's try tcmalloc - if echo 'int main() {}' | $CXX $CFLAGS -x c++ - -o /dev/null \ + if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o \ -ltcmalloc 2>/dev/null; then PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltcmalloc" JAVA_LDFLAGS="$JAVA_LDFLAGS -ltcmalloc" @@ -411,7 +456,7 @@ if ! test $ROCKSDB_DISABLE_MALLOC_USABLE_SIZE; then # Test whether malloc_usable_size is available - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() { size_t res = malloc_usable_size(0); @@ -424,9 +469,25 @@ fi fi + if ! test $ROCKSDB_DISABLE_MEMKIND; then + # Test whether memkind library is installed + $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -lmemkind -x c++ - -o test.o 2>/dev/null < + int main() { + memkind_malloc(MEMKIND_DAX_KMEM, 1024); + return 0; + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DMEMKIND" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lmemkind" + JAVA_LDFLAGS="$JAVA_LDFLAGS -lmemkind" + fi + fi + if ! test $ROCKSDB_DISABLE_PTHREAD_MUTEX_ADAPTIVE_NP; then # Test whether PTHREAD_MUTEX_ADAPTIVE_NP mutex type is available - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() { int x = PTHREAD_MUTEX_ADAPTIVE_NP; @@ -441,7 +502,7 @@ if ! test $ROCKSDB_DISABLE_BACKTRACE; then # Test whether backtrace is available - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() { void* frames[1]; @@ -453,7 +514,7 @@ COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE" else # Test whether execinfo library is installed - $CXX $CFLAGS -lexecinfo -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() { void* frames[1]; @@ -470,7 +531,7 @@ if ! test $ROCKSDB_DISABLE_PG; then # Test if -pg is supported - $CXX $CFLAGS -pg -x c++ - -o /dev/null 2>/dev/null </dev/null </dev/null </dev/null < int main() { int fd = open("/dev/null", 0); @@ -496,7 +557,7 @@ if ! test $ROCKSDB_DISABLE_SCHED_GETCPU; then # Test whether sched_getcpu is supported - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < int main() { int cpuid = sched_getcpu(); @@ -508,9 +569,23 @@ fi fi + if ! test $ROCKSDB_DISABLE_AUXV_GETAUXVAL; then + # Test whether getauxval is supported + $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null < + int main() { + uint64_t auxv = getauxval(AT_HWCAP); + (void)auxv; + } +EOF + if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_AUXV_GETAUXVAL_PRESENT" + fi + fi + if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then # Test whether c++17 aligned-new is supported - $CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o /dev/null 2>/dev/null </dev/null </dev/null < + int main() {} +EOF + if [ "$?" = 0 ]; then + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbenchmark" + fi + fi fi # TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning. -# -Wshorten-64-to-32 breaks compilation on FreeBSD i386 -if ! [ "$TARGET_OS" = FreeBSD -a "$TARGET_ARCHITECTURE" = i386 ]; then +# -Wshorten-64-to-32 breaks compilation on FreeBSD aarch64 and i386 +if ! { [ "$TARGET_OS" = FreeBSD ] && [ "$TARGET_ARCHITECTURE" = arm64 -o "$TARGET_ARCHITECTURE" = i386 ]; }; then # Test whether -Wshorten-64-to-32 is available - $CXX $CFLAGS -x c++ - -o /dev/null -Wshorten-64-to-32 2>/dev/null </dev/null </dev/null; then + COMMON_FLAGS="$COMMON_FLAGS -march=native " + else + COMMON_FLAGS="$COMMON_FLAGS -march=z196 " + fi + COMMON_FLAGS="$COMMON_FLAGS" elif [ "$TARGET_OS" == "IOS" ]; then COMMON_FLAGS="$COMMON_FLAGS" elif [ "$TARGET_OS" == "AIX" ] || [ "$TARGET_OS" == "SunOS" ]; then @@ -575,6 +666,40 @@ if test "$USE_SSE"; then TRY_SSE_ETC="1" fi + + if test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then + COMMON_FLAGS="$COMMON_FLAGS -march=z196 " + fi + + if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then + # For portability compile for macOS 10.12 (2016) or newer + COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.12" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.12" + # -mmacosx-version-min must come first here. + PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.12 $PLATFORM_SHARED_LDFLAGS" + PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.12" + JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.12" + JAVA_STATIC_DEPS_LDFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" + JAVA_STATIC_DEPS_CCFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" + JAVA_STATIC_DEPS_CXXFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" + fi +fi + +if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then + # check for GNU libc on ppc64 + $CXX -x c++ - -o /dev/null 2>/dev/null < + #include + #include + + int main(int argc, char *argv[]) { + printf("GNU libc version: %s\n", gnu_get_libc_version()); + return 0; + } +EOF + if [ "$?" != 0 ]; then + PPC_LIBC_IS_GNU=0 + fi fi if test "$TRY_SSE_ETC"; then @@ -584,14 +709,21 @@ # It doesn't even really check that your current CPU is compatible. # # SSE4.2 available since nehalem, ca. 2008-2010 + # Includes POPCNT for BitsSetToOne, BitParity TRY_SSE42="-msse4.2" # PCLMUL available since westmere, ca. 2010-2011 TRY_PCLMUL="-mpclmul" # AVX2 available since haswell, ca. 2013-2015 TRY_AVX2="-mavx2" + # BMI available since haswell, ca. 2013-2015 + # Primarily for TZCNT for CountTrailingZeroBits + TRY_BMI="-mbmi" + # LZCNT available since haswell, ca. 2013-2015 + # For FloorLog2 + TRY_LZCNT="-mlzcnt" fi -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o /dev/null 2>/dev/null </dev/null < #include int main() { @@ -605,7 +737,7 @@ echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" >&2 fi -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_PCLMUL -x c++ - -o /dev/null 2>/dev/null </dev/null < #include int main() { @@ -622,7 +754,7 @@ echo "warning: USE_SSE specified but compiler could not use PCLMUL intrinsics, disabling" >&2 fi -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_AVX2 -x c++ - -o /dev/null 2>/dev/null </dev/null < #include int main() { @@ -637,7 +769,35 @@ echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2 fi -$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null < + #include + int main(int argc, char *argv[]) { + (void)argv; + return (int)_tzcnt_u64((uint64_t)argc); + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS $TRY_BMI -DHAVE_BMI" +elif test "$USE_SSE"; then + echo "warning: USE_SSE specified but compiler could not use BMI intrinsics, disabling" >&2 +fi + +$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o test.o 2>/dev/null < + #include + int main(int argc, char *argv[]) { + (void)argv; + return (int)_lzcnt_u64((uint64_t)argc); + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS $TRY_LZCNT -DHAVE_LZCNT" +elif test "$USE_SSE"; then + echo "warning: USE_SSE specified but compiler could not use LZCNT intrinsics, disabling" >&2 +fi + +$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null < int main() { uint64_t a = 0xffffFFFFffffFFFF; @@ -654,7 +814,7 @@ # succeed because the cross-compiler flags are added by the Makefile, not this # script. if [ "$PLATFORM" != IOS ]; then - $CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null </dev/null </dev/null </dev/null + $CXX $COMMON_FLAGS $PLATFORM_SHARED_LDFLAGS test_dl.o -o test.o 2>/dev/null if [ "$?" = 0 ]; then EXEC_LDFLAGS+="-ldl" rm -f test_dl.o @@ -681,6 +842,20 @@ fi fi +# check for F_FULLFSYNC +$CXX $PLATFORM_CXXFALGS -x c++ - -o test.o 2>/dev/null < + int main() { + fcntl(0, F_FULLFSYNC); + return 0; + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DHAVE_FULLFSYNC" +fi + +rm -f test.o test_dl.o + PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS" PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS" @@ -692,10 +867,16 @@ echo "CC=$CC" >> "$OUTPUT" echo "CXX=$CXX" >> "$OUTPUT" +echo "AR=$AR" >> "$OUTPUT" echo "PLATFORM=$PLATFORM" >> "$OUTPUT" echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> "$OUTPUT" +echo "PLATFORM_CMAKE_FLAGS=$PLATFORM_CMAKE_FLAGS" >> "$OUTPUT" echo "JAVA_LDFLAGS=$JAVA_LDFLAGS" >> "$OUTPUT" echo "JAVA_STATIC_LDFLAGS=$JAVA_STATIC_LDFLAGS" >> "$OUTPUT" +echo "JAVA_STATIC_DEPS_CCFLAGS=$JAVA_STATIC_DEPS_CCFLAGS" >> "$OUTPUT" +echo "JAVA_STATIC_DEPS_CXXFLAGS=$JAVA_STATIC_DEPS_CXXFLAGS" >> "$OUTPUT" +echo "JAVA_STATIC_DEPS_LDFLAGS=$JAVA_STATIC_DEPS_LDFLAGS" >> "$OUTPUT" +echo "JAVAC_ARGS=$JAVAC_ARGS" >> "$OUTPUT" echo "VALGRIND_VER=$VALGRIND_VER" >> "$OUTPUT" echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> "$OUTPUT" echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> "$OUTPUT" @@ -728,3 +909,6 @@ if test -n "$USE_FOLLY_DISTRIBUTED_MUTEX"; then echo "USE_FOLLY_DISTRIBUTED_MUTEX=$USE_FOLLY_DISTRIBUTED_MUTEX" >> "$OUTPUT" fi +if test -n "$PPC_LIBC_IS_GNU"; then + echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT" +fi diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/check-sources.sh mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/check-sources.sh --- mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/check-sources.sh 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/check-sources.sh 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# +# Check for some simple mistakes that should prevent commit or push + +BAD="" + +git grep 'namespace rocksdb' -- '*.[ch]*' +if [ "$?" != "1" ]; then + echo "^^^^^ Do not hardcode namespace rocksdb. Use ROCKSDB_NAMESPACE" + BAD=1 +fi + +git grep -i 'nocommit' -- ':!build_tools/check-sources.sh' +if [ "$?" != "1" ]; then + echo "^^^^^ Code was not intended to be committed" + BAD=1 +fi + +git grep ' /dev/null -then - echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!" - echo "You can download clang-format-diff.py by running: " - echo " curl --location http://goo.gl/iUW1u2 -o ${CLANG_FORMAT_DIFF}" - echo "You can download clang-format by running:" - echo " brew install clang-format" - echo " Or" - echo " apt install clang-format" - echo " This might work too:" - echo " yum install git-clang-format" - echo "Then, move both files (i.e. ${CLANG_FORMAT_DIFF} and clang-format) to some directory within PATH=${PATH}" - echo "and make sure ${CLANG_FORMAT_DIFF} is executable." - exit 128 -fi - -# Check argparse, a library that clang-format-diff.py requires. -python 2>/dev/null << EOF -import argparse -EOF - -if [ "$?" != 0 ] -then - echo "To run clang-format-diff.py, we'll need the library "argparse" to be" - echo "installed. You can try either of the follow ways to install it:" - echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse" - echo " 2. easy_install argparse (if you have easy_install)" - echo " 3. pip install argparse (if you have pip)" - exit 129 +print_usage () { + echo "Usage:" + echo "format-diff.sh [OPTIONS]" + echo "-c: check only." + echo "-h: print this message." +} + +while getopts ':ch' OPTION; do + case "$OPTION" in + c) + CHECK_ONLY=1 + ;; + h) + print_usage + exit 1 + ;; + ?) + print_usage + exit 1 + ;; + esac +done + +REPO_ROOT="$(git rev-parse --show-toplevel)" + +if [ "$CLANG_FORMAT_DIFF" ]; then + echo "Note: CLANG_FORMAT_DIFF='$CLANG_FORMAT_DIFF'" + # Dry run to confirm dependencies like argparse + if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then + true #Good + else + exit 128 + fi +else + # First try directly executing the possibilities + if clang-format-diff --help &> /dev/null < /dev/null; then + CLANG_FORMAT_DIFF=clang-format-diff + elif clang-format-diff.py --help &> /dev/null < /dev/null; then + CLANG_FORMAT_DIFF=clang-format-diff.py + elif $REPO_ROOT/clang-format-diff.py --help &> /dev/null < /dev/null; then + CLANG_FORMAT_DIFF=$REPO_ROOT/clang-format-diff.py + else + # This probably means we need to directly invoke the interpreter. + # But first find clang-format-diff.py + if [ -f "$REPO_ROOT/clang-format-diff.py" ]; then + CFD_PATH="$REPO_ROOT/clang-format-diff.py" + elif which clang-format-diff.py &> /dev/null; then + CFD_PATH="$(which clang-format-diff.py)" + else + echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!" + echo "You can download clang-format-diff.py by running: " + echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py" + echo "You should make sure the downloaded script is not compromised." + echo "You can download clang-format by running:" + echo " brew install clang-format" + echo " Or" + echo " apt install clang-format" + echo " This might work too:" + echo " yum install git-clang-format" + echo "Then make sure clang-format is available and executable from \$PATH:" + echo " clang-format --version" + exit 128 + fi + # Check argparse pre-req on interpreter, or it will fail + if echo import argparse | ${PYTHON:-python3}; then + true # Good + else + echo "To run clang-format-diff.py, we'll need the library "argparse" to be" + echo "installed. You can try either of the follow ways to install it:" + echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse" + echo " 2. easy_install argparse (if you have easy_install)" + echo " 3. pip install argparse (if you have pip)" + exit 129 + fi + # Unfortunately, some machines have a Python2 clang-format-diff.py + # installed but only a Python3 interpreter installed. Unfortunately, + # automatic 2to3 migration is insufficient, so suggest downloading latest. + if grep -q "print '" "$CFD_PATH" && \ + ${PYTHON:-python3} --version | grep -q 'ython 3'; then + echo "You have clang-format-diff.py for Python 2 but are using a Python 3" + echo "interpreter (${PYTHON:-python3})." + echo "You can download clang-format-diff.py for Python 3 by running: " + echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py" + echo "You should make sure the downloaded script is not compromised." + exit 130 + fi + CLANG_FORMAT_DIFF="${PYTHON:-python3} $CFD_PATH" + # This had better work after all those checks + if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then + true #Good + else + exit 128 + fi + fi fi # TODO(kailiu) following work is not complete since we still need to figure @@ -62,31 +122,41 @@ # If there's no uncommitted changes, we assume user are doing post-commit # format check, in which case we'll try to check the modified lines vs. the -# facebook/rocksdb.git master branch. Otherwise, we'll check format of the +# facebook/rocksdb.git main branch. Otherwise, we'll check format of the # uncommitted code only. if [ -z "$uncommitted_code" ] then # Attempt to get name of facebook/rocksdb.git remote. - [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)" + [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(LC_ALL=POSIX LANG=POSIX git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)" # Fall back on 'origin' if that fails [ "$FORMAT_REMOTE" ] || FORMAT_REMOTE=origin - # Use master branch from that remote - [ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/master" + # Use main branch from that remote + [ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/$(LC_ALL=POSIX LANG=POSIX git remote show $FORMAT_REMOTE | sed -n '/HEAD branch/s/.*: //p')" # Get the common ancestor with that remote branch. Everything after that # common ancestor would be considered the contents of a pull request, so # should be relevant for formatting fixes. FORMAT_UPSTREAM_MERGE_BASE="$(git merge-base "$FORMAT_UPSTREAM" HEAD)" # Get the differences diffs=$(git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -p 1) + echo "Checking format of changes not yet in $FORMAT_UPSTREAM..." else # Check the format of uncommitted lines, diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1) + echo "Checking format of uncommitted changes..." fi if [ -z "$diffs" ] then echo "Nothing needs to be reformatted!" exit 0 +elif [ $CHECK_ONLY ] +then + echo "Your change has unformatted code. Please run make format!" + if [ $VERBOSE_CHECK ]; then + clang-format --version + echo "$diffs" + fi + exit 1 fi # Highlight the insertion/deletion from the clang-format-diff.py's output @@ -121,7 +191,7 @@ then git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -i -p 1 else - git diff -U0 HEAD^ | $CLANG_FORMAT_DIFF -i -p 1 + git diff -U0 HEAD | $CLANG_FORMAT_DIFF -i -p 1 fi echo "Files reformatted!" diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/gnu_parallel mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/gnu_parallel --- mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/gnu_parallel 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/gnu_parallel 2025-05-19 16:14:27.000000000 +0000 @@ -1561,6 +1561,7 @@ ::die_bug("Can't dup STDERR: $!"); open $Global::original_stdin, "<&", "STDIN" or ::die_bug("Can't dup STDIN: $!"); + $Global::is_terminal = (-t $Global::original_stderr); } sub enough_file_handles { @@ -1840,12 +1841,17 @@ } } +$opt::min_progress_interval = 0; + sub init_progress { # Uses: # $opt::bar # Returns: # list of computers for progress output $|=1; + if (not $Global::is_terminal) { + $opt::min_progress_interval = 30; + } if($opt::bar) { return("",""); } @@ -1870,6 +1876,9 @@ } my $last_header=""; my $sleep = 0.2; + my $last_left = 1000000000; + my $last_progress_time = 0; + my $ps_reported = 0; do { while($Global::total_running > 0) { debug($Global::total_running, "==", scalar @@ -1880,14 +1889,38 @@ close $job->fh(0,"w"); } } - if($opt::progress) { + # When not connected to terminal, assume CI (e.g. CircleCI). In + # that case we want occasional progress output to prevent abort + # due to timeout with no output, but we also need to stop sending + # progress output if there has been no actual progress, so that + # the job can time out appropriately (CirecleCI: 10m) in case of + # a hung test. But without special output, it is extremely + # annoying to diagnose which test is hung, so we add that using + # `ps` below. + if($opt::progress and + ($Global::is_terminal or (time() - $last_progress_time) >= 30)) { my %progress = progress(); if($last_header ne $progress{'header'}) { print $Global::original_stderr "\n", $progress{'header'}, "\n"; $last_header = $progress{'header'}; } - print $Global::original_stderr "\r",$progress{'status'}; - flush $Global::original_stderr; + if ($Global::is_terminal) { + print $Global::original_stderr "\r",$progress{'status'}; + } + if ($last_left > $Global::left) { + if (not $Global::is_terminal) { + print $Global::original_stderr $progress{'status'},"\n"; + } + $last_progress_time = time(); + $ps_reported = 0; + } elsif (not $ps_reported and (time() - $last_progress_time) >= 60) { + # No progress in at least 60 seconds: run ps + print $Global::original_stderr "\n"; + system("ps", "-wf"); + $ps_reported = 1; + } + $last_left = $Global::left; + flush $Global::original_stderr; } if($Global::total_running < $Global::max_jobs_running and not $Global::JobQueue->empty()) { @@ -1921,7 +1954,7 @@ not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); if($opt::progress) { my %progress = progress(); - print $Global::original_stderr "\r", $progress{'status'}, "\n"; + print $Global::original_stderr $opt::progress_sep, $progress{'status'}, "\n"; flush $Global::original_stderr; } } @@ -1954,10 +1987,11 @@ my $eta = ""; my ($status,$header)=("",""); if($opt::eta) { - my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = - compute_eta(); - $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", - $this_eta, $left, $avgtime); + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + $Global::left = $left; } my $termcols = terminal_columns(); my @workers = sort keys %Global::host; @@ -5801,7 +5835,7 @@ . "-" . $self->seq(); } else { $workdir = $opt::workdir; - # Rsync treats /./ special. We dont want that + # Rsync treats /./ special. We don't want that $workdir =~ s:/\./:/:g; # Remove /./ $workdir =~ s:/+$::; # Remove ending / if any $workdir =~ s:^\./::g; # Remove starting ./ if any diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/make_package.sh mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/make_package.sh --- mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/make_package.sh 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/make_package.sh 2025-05-19 16:14:27.000000000 +0000 @@ -103,31 +103,26 @@ gem_install fpm make static_lib - make install INSTALL_PATH=package - - cd package - - LIB_DIR=lib - if [[ -z "$ARCH" ]]; then - ARCH=$(getconf LONG_BIT) - fi - if [[ ("$FPM_OUTPUT" = "rpm") && ($ARCH -eq 64) ]]; then - mv lib lib64 - LIB_DIR=lib64 + LIBDIR=/usr/lib + if [[ $FPM_OUTPUT = "rpm" ]]; then + LIBDIR=$(rpm --eval '%_libdir') fi + rm -rf package + make install DESTDIR=package PREFIX=/usr LIBDIR=$LIBDIR + fpm \ -s dir \ -t $FPM_OUTPUT \ + -C package \ -n rocksdb \ -v $1 \ - --prefix /usr \ --url http://rocksdb.org/ \ -m rocksdb@fb.com \ --license BSD \ --vendor Facebook \ --description "RocksDB is an embeddable persistent key-value store for fast storage." \ - include $LIB_DIR + usr } # shellcheck disable=SC2068 diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/regression_build_test.sh mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/regression_build_test.sh --- mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/regression_build_test.sh 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/regression_build_test.sh 2025-05-19 16:14:27.000000000 +0000 @@ -20,26 +20,11 @@ function cleanup { rm -rf $DATA_DIR - rm -f $STAT_FILE.fillseq - rm -f $STAT_FILE.readrandom - rm -f $STAT_FILE.overwrite - rm -f $STAT_FILE.memtablefillreadrandom + rm -f $STAT_FILE.* } trap cleanup EXIT -if [ -z $GIT_BRANCH ]; then - git_br=`git rev-parse --abbrev-ref HEAD` -else - git_br=$(basename $GIT_BRANCH) -fi - -if [ $git_br == "master" ]; then - git_br="" -else - git_br="."$git_br -fi - make release # measure fillseq + fill up the DB for overwrite benchmark @@ -286,12 +271,10 @@ --sync=0 \ --verify_checksum=1 \ --delete_obsolete_files_period_micros=314572800 \ - --max_grandparent_overlap_factor=10 \ --use_plain_table=1 \ --open_files=-1 \ --mmap_read=1 \ --mmap_write=0 \ - --memtablerep=prefix_hash \ --bloom_bits=10 \ --bloom_locality=1 \ --perf_level=0" @@ -378,7 +361,7 @@ echo >&2 "ERROR: Key $key doesn't have a value." return fi - curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build$git_br&key=$key&value=$value" \ + curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \ --connect-timeout 60 } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/rocksdb-lego-determinator mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/rocksdb-lego-determinator --- mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/rocksdb-lego-determinator 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/rocksdb-lego-determinator 2025-05-19 16:14:27.000000000 +0000 @@ -3,7 +3,7 @@ # to determine next steps to run # Usage: -# EMAIL= ONCALL= TRIGGER= SUBSCRIBER= rocks_ci.py +# EMAIL= ONCALL= TRIGGER= SUBSCRIBER= WORKINGDIR= rocksdb-lego-determinator # # Input Value # ------------------------------------------------------------------------- @@ -11,7 +11,7 @@ # ONCALL Email address to raise a task on failure # TRIGGER Trigger conditions for email. Valid values are fail, warn, all # SUBSCRIBER Email addresss to add as subscriber for task -# +# WORKINGDIR Working directory # # Report configuration @@ -24,22 +24,22 @@ REPORT_EMAIL=" { - 'type':'email', - 'triggers': [ '$TRIGGER' ], - 'emails':['$EMAIL'] - }," + \"type\":\"email\", + \"triggers\": [ \"$TRIGGER\" ], + \"emails\":[\"$EMAIL\"] + }" fi CREATE_TASK= if [ ! -z $ONCALL ]; then CREATE_TASK=" { - 'type':'task', - 'triggers':[ 'fail' ], - 'priority':0, - 'subscribers':[ '$SUBSCRIBER' ], - 'tags':[ 'rocksdb', 'ci' ], - }," + \"type\":\"task\", + \"triggers\":[ \"fail\" ], + \"priority\":0, + \"subscribers\":[ \"$SUBSCRIBER\" ], + \"tags\":[ \"rocksdb\", \"ci\" ] + }" fi # For now, create the tasks using only the dedicated task creation tool. @@ -47,47 +47,54 @@ REPORT= if [[ ! -z $REPORT_EMAIL || ! -z $CREATE_TASK ]]; then - REPORT="'report': [ - $REPORT_EMAIL + REPORT=",\"report\": [ + $REPORT_EMAIL, $CREATE_TASK ]" fi +# Working directory for the following command, default to current directory +WORKING_DIR=. +if [ ! -z $WORKINGDIR ]; then + WORKING_DIR=$WORKINGDIR +fi + # # Helper variables # CLEANUP_ENV=" { - 'name':'Cleanup environment', - 'shell':'rm -rf /dev/shm/rocksdb && mkdir /dev/shm/rocksdb && (chmod +t /dev/shm || true) && make clean', - 'user':'root' + \"name\":\"Cleanup environment\", + \"shell\":\"cd $WORKING_DIR; rm -rf /dev/shm/rocksdb && mkdir /dev/shm/rocksdb && (chmod +t /dev/shm || true) && make clean\", + \"user\":\"root\" }" UPLOAD_DB_DIR=" { - 'name':'Upload database directory', - 'shell':'tar -cvzf rocksdb_db.tar.gz /dev/shm/rocksdb/', - 'user':'root', - 'cleanup':true, - 'provide_artifacts': [ - { - 'name':'rocksdb_db_dir', - 'paths': ['rocksdb_db.tar.gz'], - 'bundle': false, - }, - ], + \"name\":\"Upload database directory\", + \"shell\":\"tar -cvzf rocksdb_db.tar.gz /dev/shm/rocksdb/\", + \"user\":\"root\", + \"cleanup\":true, + \"provide_artifacts\": [ + { + \"name\":\"rocksdb_db_dir\", + \"paths\": [\"rocksdb_db.tar.gz\"], + \"bundle\": false + } + ] }" -# We will eventually set the RATIO to 1, but we want do this -# in steps. RATIO=$(nproc) will make it work as J=1 +# set default RATIO to 1, which sets J=$(nproc) and j=$(nproc) if [ -z $RATIO ]; then - RATIO=$(nproc) + RATIO=1 fi +# Should probably be called PARALLEL_TEST if [ -z $PARALLEL_J ]; then PARALLEL_J="J=$(expr $(nproc) / ${RATIO})" fi +# Should probably be called PARALLEL_MAKE if [ -z $PARALLEL_j ]; then PARALLEL_j="-j$(expr $(nproc) / ${RATIO})" fi @@ -100,18 +107,18 @@ GCC_481="ROCKSDB_FBCODE_BUILD_WITH_481=1" ASAN="COMPILE_WITH_ASAN=1" CLANG="USE_CLANG=1" -# in gcc-5 there are known problems with TSAN like https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71090. -# using platform007 gives us gcc-8 or higher which has that bug fixed. -TSAN="ROCKSDB_FBCODE_BUILD_WITH_PLATFORM007=1 COMPILE_WITH_TSAN=1" +TSAN="COMPILE_WITH_TSAN=1" UBSAN="COMPILE_WITH_UBSAN=1" -TSAN_CRASH='CRASH_TEST_EXT_ARGS="--compression_type=zstd --log2_keys_per_lock=22"' +ASAN_CRASH="ASAN_OPTIONS=disable_coredump=0" +TSAN_CRASH="CRASH_TEST_EXT_ARGS=\\\"--compression_type=zstd --log2_keys_per_lock=22\\\"" NON_TSAN_CRASH="CRASH_TEST_EXT_ARGS=--compression_type=zstd" DISABLE_JEMALLOC="DISABLE_JEMALLOC=1" HTTP_PROXY="https_proxy=http://fwdproxy.29.prn1:8080 http_proxy=http://fwdproxy.29.prn1:8080 ftp_proxy=http://fwdproxy.29.prn1:8080" SETUP_JAVA_ENV="export $HTTP_PROXY; export JAVA_HOME=/usr/local/jdk-8u60-64/; export PATH=\$JAVA_HOME/bin:\$PATH" -PARSER="'parser':'python build_tools/error_filter.py $1'" +PARSER="\"parser\":\"/usr/bin/env python3 build_tools/error_filter.py $1\"" CONTRUN_NAME="ROCKSDB_CONTRUN_NAME" +SKIP_FORMAT_CHECKS="SKIP_FORMAT_BUCK_CHECKS=1" # This code is getting called under various scenarios. What we care about is to # understand when it's called from nightly contruns because in that case we'll @@ -129,15 +136,15 @@ # DISABLE_COMMANDS="[ { - 'name':'Disable test', - 'oncall':'$ONCALL', - 'steps': [ - { - 'name':'Job disabled. Please contact test owner', - 'shell':'exit 1', - 'user':'root' - }, - ], + \"name\":\"Disable test\", + \"oncall\":\"$ONCALL\", + \"steps\": [ + { + \"name\":\"Job disabled. Please contact test owner\", + \"shell\":\"exit 1\", + \"user\":\"root\" + } + ] } ]" @@ -146,18 +153,18 @@ # UNIT_TEST_COMMANDS="[ { - 'name':'Rocksdb Unit Test', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Unit Test\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and test RocksDB debug version', - 'shell':'$SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=check $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and test RocksDB debug version\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $SKIP_FORMAT_CHECKS make $PARALLELISM check || $CONTRUN_NAME=check $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -167,20 +174,20 @@ # UNIT_TEST_NON_SHM_COMMANDS="[ { - 'name':'Rocksdb Unit Test', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Unit Test\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and test RocksDB debug version', - 'timeout': 86400, - 'shell':'$NON_SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=non_shm_check $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and test RocksDB debug version\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $NON_SHM $DEBUG $SKIP_FORMAT_CHECKS make $PARALLELISM check || $CONTRUN_NAME=non_shm_check $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -190,18 +197,18 @@ # RELEASE_BUILD_COMMANDS="[ { - 'name':'Rocksdb Release Build', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Release Build\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build RocksDB release', - 'shell':'make $PARALLEL_j release || $CONTRUN_NAME=release $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build RocksDB release\", + \"shell\":\"cd $WORKING_DIR; make $PARALLEL_j release || $CONTRUN_NAME=release $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -211,18 +218,18 @@ # UNIT_TEST_COMMANDS_481="[ { - 'name':'Rocksdb Unit Test on GCC 4.8.1', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Unit Test on GCC 4.8.1\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and test RocksDB debug version', - 'shell':'$SHM $GCC_481 $DEBUG make $PARALLELISM check || $CONTRUN_NAME=unit_gcc_481_check $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and test RocksDB debug version\", + \"shell\":\"cd $WORKING_DIR; $SHM $GCC_481 $DEBUG $SKIP_FORMAT_CHECKS make $PARALLELISM check || $CONTRUN_NAME=unit_gcc_481_check $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -232,18 +239,18 @@ # RELEASE_BUILD_COMMANDS_481="[ { - 'name':'Rocksdb Release on GCC 4.8.1', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Release on GCC 4.8.1\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build RocksDB release on GCC 4.8.1', - 'shell':'$GCC_481 make $PARALLEL_j release || $CONTRUN_NAME=release_gcc481 $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build RocksDB release on GCC 4.8.1\", + \"shell\":\"cd $WORKING_DIR; $GCC_481 make $PARALLEL_j release || $CONTRUN_NAME=release_gcc481 $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -253,18 +260,18 @@ # CLANG_UNIT_TEST_COMMANDS="[ { - 'name':'Rocksdb Unit Test', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Unit Test\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and test RocksDB debug', - 'shell':'$CLANG $SHM $DEBUG make $PARALLELISM check || $CONTRUN_NAME=clang_check $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and test RocksDB debug\", + \"shell\":\"cd $WORKING_DIR; $CLANG $SHM $DEBUG $SKIP_FORMAT_CHECKS make $PARALLELISM check || $CONTRUN_NAME=clang_check $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -274,18 +281,18 @@ # CLANG_RELEASE_BUILD_COMMANDS="[ { - 'name':'Rocksdb CLANG Release Build', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb CLANG Release Build\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build RocksDB release', - 'shell':'$CLANG make $PARALLEL_j release|| $CONTRUN_NAME=clang_release $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build RocksDB release\", + \"shell\":\"cd $WORKING_DIR; $CLANG make $PARALLEL_j release|| $CONTRUN_NAME=clang_release $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -295,18 +302,18 @@ # CLANG_ANALYZE_COMMANDS="[ { - 'name':'Rocksdb analyze', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb analyze\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'RocksDB build and analyze', - 'shell':'$CLANG $SHM $DEBUG make $PARALLEL_j analyze || $CONTRUN_NAME=clang_analyze $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"RocksDB build and analyze\", + \"shell\":\"cd $WORKING_DIR; $CLANG $SHM $DEBUG make $PARALLEL_j analyze || $CONTRUN_NAME=clang_analyze $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -316,18 +323,18 @@ # CODE_COV_COMMANDS="[ { - 'name':'Rocksdb Unit Test Code Coverage', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Unit Test Code Coverage\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build, test and collect code coverage info', - 'shell':'$SHM $DEBUG make $PARALLELISM coverage || $CONTRUN_NAME=coverage $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build, test and collect code coverage info\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $SKIP_FORMAT_CHECKS make $PARALLELISM coverage || $CONTRUN_NAME=coverage $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -337,18 +344,18 @@ # UNITY_COMMANDS="[ { - 'name':'Rocksdb Unity', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Unity\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build, test unity test', - 'shell':'$SHM $DEBUG V=1 make J=1 unity_test || $CONTRUN_NAME=unity_test $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build, test unity test\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG V=1 make $PARALLELISM unity_test || $CONTRUN_NAME=unity_test $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -358,65 +365,108 @@ # LITE_BUILD_COMMANDS="[ { - 'name':'Rocksdb Lite build', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Lite build\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build RocksDB debug version', - 'shell':'make J=1 LITE=1 all check || $CONTRUN_NAME=lite $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build RocksDB debug version\", + \"shell\":\"cd $WORKING_DIR; $SKIP_FORMAT_CHECKS make $PARALLELISM LITE=1 all check || $CONTRUN_NAME=lite $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + } + ] + $REPORT + } +]" + +# +# RocksDB stress/crash test +# +STRESS_CRASH_TEST_COMMANDS="[ + { + \"name\":\"Rocksdb Stress and Crash Test\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + $CLEANUP_ENV, + { + \"name\":\"Build and run RocksDB debug stress tests\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - ], + { + \"name\":\"Build and run RocksDB debug crash tests\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM crash_test || $CONTRUN_NAME=crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] $REPORT } ]" # -# Report RocksDB lite binary size to scuba -REPORT_LITE_BINARY_SIZE_COMMANDS="[ +# RocksDB blackbox stress/crash test +# +BLACKBOX_STRESS_CRASH_TEST_COMMANDS="[ { - 'name':'Rocksdb Lite Binary Size', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Blackbox Stress and Crash Test\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Report RocksDB Lite binary size to scuba', - 'shell':'tools/report_lite_binary_size.sh', - 'user':'root', + \"name\":\"Build and run RocksDB debug stress tests\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER }, - ], + { + \"name\":\"Build and run RocksDB debug blackbox crash tests\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM blackbox_crash_test || $CONTRUN_NAME=blackbox_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] + $REPORT + } ]" # -# RocksDB stress/crash test +# RocksDB whitebox stress/crash test # -STRESS_CRASH_TEST_COMMANDS="[ +WHITEBOX_STRESS_CRASH_TEST_COMMANDS="[ { - 'name':'Rocksdb Stress and Crash Test', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Whitebox Stress and Crash Test\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug stress tests', - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug stress tests\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, { - 'name':'Build and run RocksDB debug crash tests', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 crash_test || $CONTRUN_NAME=crash_test $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug whitebox crash tests\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM whitebox_crash_test || $CONTRUN_NAME=whitebox_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -426,27 +476,27 @@ # STRESS_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ { - 'name':'Rocksdb Stress and Crash Test with atomic flush', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Stress and Crash Test with atomic flush\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug stress tests', - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug stress tests\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, { - 'name':'Build and run RocksDB debug crash tests with atomic flush', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 crash_test_with_atomic_flush || $CONTRUN_NAME=crash_test_with_atomic_flush $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug crash tests with atomic flush\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM crash_test_with_atomic_flush || $CONTRUN_NAME=crash_test_with_atomic_flush $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -456,27 +506,57 @@ # STRESS_CRASH_TEST_WITH_TXN_COMMANDS="[ { - 'name':'Rocksdb Stress and Crash Test with txn', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Stress and Crash Test with txn\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug stress tests', - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug stress tests\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, { - 'name':'Build and run RocksDB debug crash tests with txn', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 crash_test_with_txn || $CONTRUN_NAME=crash_test_with_txn $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug crash tests with txn\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM crash_test_with_txn || $CONTRUN_NAME=crash_test_with_txn $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] + $REPORT + } +]" + +# +# RocksDB stress/crash test with timestamp +# +STRESS_CRASH_TEST_WITH_TS_COMMANDS="[ + { + \"name\":\"Rocksdb Stress and Crash Test with ts\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + $CLEANUP_ENV, + { + \"name\":\"Build and run RocksDB debug stress tests\", + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM db_stress || $CONTRUN_NAME=db_stress $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + { + \"name\":\"Build and run RocksDB debug crash tests with ts\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH make $PARALLELISM crash_test_with_ts || $CONTRUN_NAME=crash_test_with_ts $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -486,19 +566,19 @@ # because we want to add some randomness to fsync commands WRITE_STRESS_COMMANDS="[ { - 'name':'Rocksdb Write Stress Test', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Write Stress Test\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB write stress tests', - 'shell':'make write_stress && python tools/write_stress_runner.py --runtime_sec=3600 --db=/tmp/rocksdb_write_stress || $CONTRUN_NAME=write_stress $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB write stress tests\", + \"shell\":\"cd $WORKING_DIR; make write_stress && /usr/bin/env python3 tools/write_stress_runner.py --runtime_sec=3600 --db=/tmp/rocksdb_write_stress || $CONTRUN_NAME=write_stress $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER } ], - 'artifacts': [{'name': 'database', 'paths': ['/tmp/rocksdb_write_stress']}], + \"artifacts\": [{\"name\": \"database\", \"paths\": [\"/tmp/rocksdb_write_stress\"]}] $REPORT } ]" @@ -509,18 +589,18 @@ # ASAN_TEST_COMMANDS="[ { - 'name':'Rocksdb Unit Test under ASAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Unit Test under ASAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Test RocksDB debug under ASAN', -'shell':'set -o pipefail && ($SHM $ASAN $DEBUG make $PARALLELISM asan_check || $CONTRUN_NAME=asan_check $TASK_CREATION_TOOL) |& /usr/facebook/ops/scripts/asan_symbolize.py -d', - 'user':'root', + \"name\":\"Test RocksDB debug under ASAN\", +\"shell\":\"cd $WORKING_DIR; set -o pipefail && $SHM $ASAN $DEBUG $SKIP_FORMAT_CHECKS make $PARALLELISM asan_check || $CONTRUN_NAME=asan_check $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER } - ], + ] $REPORT } ]" @@ -530,21 +610,69 @@ # ASAN_CRASH_TEST_COMMANDS="[ { - 'name':'Rocksdb crash test under ASAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb crash test under ASAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug asan_crash_test', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 asan_crash_test || $CONTRUN_NAME=asan_crash_test $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug asan_crash_test\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $ASAN_CRASH $NON_TSAN_CRASH $SKIP_FORMAT_CHECKS make $PARALLELISM asan_crash_test || $CONTRUN_NAME=asan_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] + $REPORT + } +]" + +# +# RocksDB blackbox crash testing under address sanitizer +# +ASAN_BLACKBOX_CRASH_TEST_COMMANDS="[ + { + \"name\":\"Rocksdb blackbox crash test under ASAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + $CLEANUP_ENV, + { + \"name\":\"Build and run RocksDB debug blackbox asan_crash_test\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $ASAN_CRASH $NON_TSAN_CRASH $SKIP_FORMAT_CHECKS make $PARALLELISM blackbox_asan_crash_test || $CONTRUN_NAME=blackbox_asan_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] + $REPORT + } +]" + +# +# RocksDB whitebox crash testing under address sanitizer +# +ASAN_WHITEBOX_CRASH_TEST_COMMANDS="[ + { + \"name\":\"Rocksdb whitebox crash test under ASAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + $CLEANUP_ENV, + { + \"name\":\"Build and run RocksDB debug whitebox asan_crash_test\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $ASAN_CRASH $NON_TSAN_CRASH $SKIP_FORMAT_CHECKS make $PARALLELISM whitebox_asan_crash_test || $CONTRUN_NAME=whitebox_asan_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -554,21 +682,21 @@ # ASAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ { - 'name':'Rocksdb crash test with atomic flush under ASAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb crash test with atomic flush under ASAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug asan_crash_test_with_atomic_flush', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 asan_crash_test_with_atomic_flush || $CONTRUN_NAME=asan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug asan_crash_test_with_atomic_flush\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $ASAN_CRASH $NON_TSAN_CRASH $SKIP_FORMAT_CHECKS make $PARALLELISM asan_crash_test_with_atomic_flush || $CONTRUN_NAME=asan_crash_test_with_atomic_flush $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -578,21 +706,21 @@ # ASAN_CRASH_TEST_WITH_TXN_COMMANDS="[ { - 'name':'Rocksdb crash test with txn under ASAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb crash test with txn under ASAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug asan_crash_test_with_txn', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH make J=1 asan_crash_test_with_txn || $CONTRUN_NAME=asan_crash_test_with_txn $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug asan_crash_test_with_txn\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $ASAN_CRASH $NON_TSAN_CRASH $SKIP_FORMAT_CHECKS make $PARALLELISM asan_crash_test_with_txn || $CONTRUN_NAME=asan_crash_test_with_txn $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -602,42 +730,90 @@ # UBSAN_TEST_COMMANDS="[ { - 'name':'Rocksdb Unit Test under UBSAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Unit Test under UBSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Test RocksDB debug under UBSAN', - 'shell':'set -o pipefail && $SHM $UBSAN $CLANG $DEBUG make $PARALLELISM ubsan_check || $CONTRUN_NAME=ubsan_check $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Test RocksDB debug under UBSAN\", + \"shell\":\"cd $WORKING_DIR; set -o pipefail && $SHM $UBSAN $CLANG $DEBUG $SKIP_FORMAT_CHECKS make $PARALLELISM ubsan_check || $CONTRUN_NAME=ubsan_check $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER } - ], + ] $REPORT } ]" # -# RocksDB crash testing under udnefined behavior sanitizer +# RocksDB crash testing under undefined behavior sanitizer # UBSAN_CRASH_TEST_COMMANDS="[ { - 'name':'Rocksdb crash test under UBSAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb crash test under UBSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug ubsan_crash_test', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH $CLANG make J=1 ubsan_crash_test || $CONTRUN_NAME=ubsan_crash_test $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug ubsan_crash_test\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH $CLANG $SKIP_FORMAT_CHECKS make $PARALLELISM ubsan_crash_test || $CONTRUN_NAME=ubsan_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] + $REPORT + } +]" + +# +# RocksDB crash testing under undefined behavior sanitizer +# +UBSAN_BLACKBOX_CRASH_TEST_COMMANDS="[ + { + \"name\":\"Rocksdb blackbox crash test under UBSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + $CLEANUP_ENV, + { + \"name\":\"Build and run RocksDB debug blackbox ubsan_crash_test\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH $CLANG $SKIP_FORMAT_CHECKS make $PARALLELISM blackbox_ubsan_crash_test || $CONTRUN_NAME=blackbox_ubsan_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] + $REPORT + } +]" + +# +# RocksDB crash testing under undefined behavior sanitizer +# +UBSAN_WHITEBOX_CRASH_TEST_COMMANDS="[ + { + \"name\":\"Rocksdb whitebox crash test under UBSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + $CLEANUP_ENV, + { + \"name\":\"Build and run RocksDB debug whitebox ubsan_crash_test\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH $CLANG $SKIP_FORMAT_CHECKS make $PARALLELISM whitebox_ubsan_crash_test || $CONTRUN_NAME=whitebox_ubsan_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -647,21 +823,21 @@ # UBSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ { - 'name':'Rocksdb crash test with atomic flush under UBSAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb crash test with atomic flush under UBSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug ubsan_crash_test_with_atomic_flush', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH $CLANG make J=1 ubsan_crash_test_with_atomic_flush || $CONTRUN_NAME=ubsan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug ubsan_crash_test_with_atomic_flush\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH $CLANG $SKIP_FORMAT_CHECKS make $PARALLELISM ubsan_crash_test_with_atomic_flush || $CONTRUN_NAME=ubsan_crash_test_with_atomic_flush $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -671,21 +847,21 @@ # UBSAN_CRASH_TEST_WITH_TXN_COMMANDS="[ { - 'name':'Rocksdb crash test with txn under UBSAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb crash test with txn under UBSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build and run RocksDB debug ubsan_crash_test_with_txn', - 'timeout': 86400, - 'shell':'$SHM $DEBUG $NON_TSAN_CRASH $CLANG make J=1 ubsan_crash_test_with_txn || $CONTRUN_NAME=ubsan_crash_test_with_txn $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build and run RocksDB debug ubsan_crash_test_with_txn\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG $NON_TSAN_CRASH $CLANG $SKIP_FORMAT_CHECKS make $PARALLELISM ubsan_crash_test_with_txn || $CONTRUN_NAME=ubsan_crash_test_with_txn $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -695,20 +871,20 @@ # VALGRIND_TEST_COMMANDS="[ { - 'name':'Rocksdb Unit Test under valgrind', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Unit Test under valgrind\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Run RocksDB debug unit tests', - 'timeout': 86400, - 'shell':'$SHM $DEBUG make $PARALLELISM valgrind_test || $CONTRUN_NAME=valgrind_check $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Run RocksDB debug unit tests\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; $SHM $DEBUG make $PARALLELISM valgrind_test || $CONTRUN_NAME=valgrind_check $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -718,20 +894,20 @@ # TSAN_UNIT_TEST_COMMANDS="[ { - 'name':'Rocksdb Unit Test under TSAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Unit Test under TSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Run RocksDB debug unit test', - 'timeout': 86400, - 'shell':'set -o pipefail && $SHM $DEBUG $TSAN make $PARALLELISM check || $CONTRUN_NAME=tsan_check $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Run RocksDB debug unit test\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; set -o pipefail && $SHM $DEBUG $TSAN $SKIP_FORMAT_CHECKS make $PARALLELISM check || $CONTRUN_NAME=tsan_check $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -741,21 +917,69 @@ # TSAN_CRASH_TEST_COMMANDS="[ { - 'name':'Rocksdb Crash Test under TSAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Crash Test under TSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Compile and run', - 'timeout': 86400, - 'shell':'set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make J=1 crash_test || $CONTRUN_NAME=tsan_crash_test $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Compile and run\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make $PARALLELISM crash_test || $CONTRUN_NAME=tsan_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] + $REPORT + } +]" + +# +# RocksDB blackbox crash test under TSAN +# +TSAN_BLACKBOX_CRASH_TEST_COMMANDS="[ + { + \"name\":\"Rocksdb Blackbox Crash Test under TSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + $CLEANUP_ENV, + { + \"name\":\"Compile and run\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make $PARALLELISM blackbox_crash_test || $CONTRUN_NAME=tsan_blackbox_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] + $REPORT + } +]" + +# +# RocksDB whitebox crash test under TSAN +# +TSAN_WHITEBOX_CRASH_TEST_COMMANDS="[ + { + \"name\":\"Rocksdb Whitebox Crash Test under TSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + $CLEANUP_ENV, + { + \"name\":\"Compile and run\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make $PARALLELISM whitebox_crash_test || $CONTRUN_NAME=tsan_whitebox_crash_test $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -765,21 +989,21 @@ # TSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS="[ { - 'name':'Rocksdb Crash Test with atomic flush under TSAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Crash Test with atomic flush under TSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Compile and run', - 'timeout': 86400, - 'shell':'set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make J=1 crash_test_with_atomic_flush || $CONTRUN_NAME=tsan_crash_test_with_atomic_flush $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Compile and run\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make $PARALLELISM crash_test_with_atomic_flush || $CONTRUN_NAME=tsan_crash_test_with_atomic_flush $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -789,21 +1013,21 @@ # TSAN_CRASH_TEST_WITH_TXN_COMMANDS="[ { - 'name':'Rocksdb Crash Test with txn under TSAN', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'timeout': 86400, - 'steps': [ + \"name\":\"Rocksdb Crash Test with txn under TSAN\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ $CLEANUP_ENV, { - 'name':'Compile and run', - 'timeout': 86400, - 'shell':'set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make J=1 crash_test_with_txn || $CONTRUN_NAME=tsan_crash_test_with_txn $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Compile and run\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; set -o pipefail && $SHM $DEBUG $TSAN $TSAN_CRASH CRASH_TEST_KILL_ODD=1887 make $PARALLELISM crash_test_with_txn || $CONTRUN_NAME=tsan_crash_test_with_txn $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - $UPLOAD_DB_DIR, - ], + $UPLOAD_DB_DIR + ] $REPORT } ]" @@ -818,23 +1042,25 @@ rm -rf /dev/shm/rocksdb mkdir /dev/shm/rocksdb + export https_proxy="fwdproxy:8080" + tools/check_format_compatible.sh } FORMAT_COMPATIBLE_COMMANDS="[ { - 'name':'Rocksdb Format Compatible tests', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Format Compatible tests\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Run RocksDB debug unit test', - 'shell':'build_tools/rocksdb-lego-determinator run_format_compatible || $CONTRUN_NAME=run_format_compatible $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Run RocksDB debug unit test\", + \"shell\":\"cd $WORKING_DIR; build_tools/rocksdb-lego-determinator run_format_compatible || $CONTRUN_NAME=run_format_compatible $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -852,23 +1078,24 @@ mv .tmp.fbcode_config.sh build_tools/fbcode_config.sh cat Makefile | grep -v tools/ldb_test.py > .tmp.Makefile mv .tmp.Makefile Makefile - make $DEBUG J=1 check + export $SKIP_FORMAT_CHECKS + make $DEBUG $PARALLELISM check } NO_COMPRESSION_COMMANDS="[ { - 'name':'Rocksdb No Compression tests', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb No Compression tests\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Run RocksDB debug unit test', - 'shell':'build_tools/rocksdb-lego-determinator run_no_compression || $CONTRUN_NAME=run_no_compression $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Run RocksDB debug unit test\", + \"shell\":\"cd $WORKING_DIR; build_tools/rocksdb-lego-determinator run_no_compression || $CONTRUN_NAME=run_no_compression $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -878,7 +1105,7 @@ # run_regression() { - time -v bash -vx ./build_tools/regression_build_test.sh $(mktemp -d $WORKSPACE/leveldb.XXXX) $(mktemp leveldb_test_stats.XXXX) + time bash -vx ./build_tools/regression_build_test.sh $(mktemp -d $WORKING_DIR/rocksdb.XXXX) $(mktemp rocksdb_test_stats.XXXX) # ======= report size to ODS ======== @@ -895,6 +1122,7 @@ strip librocksdb.a send_size_to_ods static_lib_stripped $(stat --printf="%s" librocksdb.a) + make clean make -j$(nproc) shared_lib send_size_to_ods shared_lib $(stat --printf="%s" `readlink -f librocksdb.so`) strip `readlink -f librocksdb.so` @@ -907,6 +1135,7 @@ strip librocksdb.a send_size_to_ods static_lib_lite_stripped $(stat --printf="%s" librocksdb.a) + make clean make LITE=1 -j$(nproc) shared_lib send_size_to_ods shared_lib_lite $(stat --printf="%s" `readlink -f librocksdb.so`) strip `readlink -f librocksdb.so` @@ -915,17 +1144,18 @@ REGRESSION_COMMANDS="[ { - 'name':'Rocksdb regression commands', - 'oncall':'$ONCALL', - 'steps': [ + \"name\":\"Rocksdb regression commands\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Make and run script', - 'shell':'build_tools/rocksdb-lego-determinator run_regression || $CONTRUN_NAME=run_regression $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Make and run script\", + \"shell\":\"cd $WORKING_DIR; build_tools/rocksdb-lego-determinator run_regression || $CONTRUN_NAME=run_regression $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER - }, - ], + } + ] $REPORT } ]" @@ -935,18 +1165,52 @@ # JAVA_BUILD_TEST_COMMANDS="[ { - 'name':'Rocksdb Java Build', - 'oncall':'$ONCALL', - 'executeLocal': 'true', - 'steps': [ + \"name\":\"Rocksdb Java Build\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"steps\": [ $CLEANUP_ENV, { - 'name':'Build RocksDB for Java', - 'shell':'$SETUP_JAVA_ENV; $SHM make rocksdbjava || $CONTRUN_NAME=rocksdbjava $TASK_CREATION_TOOL', - 'user':'root', + \"name\":\"Build RocksDB for Java\", + \"shell\":\"cd $WORKING_DIR; $SETUP_JAVA_ENV; $SHM make $PARALLELISM rocksdbjava || $CONTRUN_NAME=rocksdbjava $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + } + ] + $REPORT + } +]" + +# +# RocksDB fbcode stress/crash test +# +FBCODE_STRESS_CRASH_TEST_COMMANDS="[ + { + \"name\":\"Rocksdb Fbcode Stress and Crash Test\", + \"oncall\":\"$ONCALL\", + \"executeLocal\": \"true\", + \"timeout\": 86400, + \"steps\": [ + { + \"name\":\"Copy RocksDB code to fbcode repo\", + \"shell\":\"cd internal_repo_rocksdb/repo && git init -b main && git add * && git commit -a -m \\\"Make internal_repo_rocksdb/repo a git repo\\\" && cd ../.. && echo Y | python3 rocks/release_script/release_to_fbcode.py -u internal_repo_rocksdb/repo main || $CONTRUN_NAME=db_stress_fbcode $TASK_CREATION_TOOL\", + \"user\":\"root\", $PARSER }, - ], + { + \"name\":\"Build RocksDB fbcode stress tests\", + \"shell\":\"cd $WORKING_DIR; buck build @mode/dbg rocks/tools:rocks_db_stress || $CONTRUN_NAME=db_stress_fbcode $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + }, + { + \"name\":\"Run RocksDB whitebox crash tests\", + \"timeout\": 86400, + \"shell\":\"cd $WORKING_DIR; mkdir /dev/shm/rocksdb_fbcode_crash_test && TEST_TMPDIR=\$(mktemp -d --tmpdir=/dev/shm/rocksdb_fbcode_crash_test) python3 rocksdb/src/tools/db_crashtest.py --stress_cmd=buck-out/dbg/gen/rocks/tools/rocks_db_stress -secondary_cache_uri=\\\"$SECONDARY_CACHE_URI\\\" --env_uri=$ENV_URI $EXTRA_DB_STRESS_ARGS -logtostderr=false $TEST_TYPE || $CONTRUN_NAME=db_stress_fbcode $TASK_CREATION_TOOL\", + \"user\":\"root\", + $PARSER + } + ] $REPORT } ]" @@ -986,18 +1250,24 @@ lite) echo $LITE_BUILD_COMMANDS ;; - report_lite_binary_size) - echo $REPORT_LITE_BINARY_SIZE_COMMANDS - ;; stress_crash) echo $STRESS_CRASH_TEST_COMMANDS ;; + blackbox_stress_crash) + echo $BLACKBOX_STRESS_CRASH_TEST_COMMANDS + ;; + whitebox_stress_crash) + echo $WHITEBOX_STRESS_CRASH_TEST_COMMANDS + ;; stress_crash_with_atomic_flush) echo $STRESS_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS ;; stress_crash_with_txn) echo $STRESS_CRASH_TEST_WITH_TXN_COMMANDS ;; + stress_crash_with_ts) + echo $STRESS_CRASH_TEST_WITH_TS_COMMANDS + ;; write_stress) echo $WRITE_STRESS_COMMANDS ;; @@ -1007,6 +1277,12 @@ asan_crash) echo $ASAN_CRASH_TEST_COMMANDS ;; + blackbox_asan_crash) + echo $ASAN_BLACKBOX_CRASH_TEST_COMMANDS + ;; + whitebox_asan_crash) + echo $ASAN_WHITEBOX_CRASH_TEST_COMMANDS + ;; asan_crash_with_atomic_flush) echo $ASAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS ;; @@ -1019,6 +1295,12 @@ ubsan_crash) echo $UBSAN_CRASH_TEST_COMMANDS ;; + blackbox_ubsan_crash) + echo $UBSAN_BLACKBOX_CRASH_TEST_COMMANDS + ;; + whitebox_ubsan_crash) + echo $UBSAN_WHITEBOX_CRASH_TEST_COMMANDS + ;; ubsan_crash_with_atomic_flush) echo $UBSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS ;; @@ -1034,6 +1316,12 @@ tsan_crash) echo $TSAN_CRASH_TEST_COMMANDS ;; + blackbox_tsan_crash) + echo $TSAN_BLACKBOX_CRASH_TEST_COMMANDS + ;; + whitebox_tsan_crash) + echo $TSAN_WHITEBOX_CRASH_TEST_COMMANDS + ;; tsan_crash_with_atomic_flush) echo $TSAN_CRASH_TEST_WITH_ATOMIC_FLUSH_COMMANDS ;; @@ -1056,11 +1344,18 @@ echo $REGRESSION_COMMANDS ;; run_regression) + set -e run_regression + set +e ;; java_build) echo $JAVA_BUILD_TEST_COMMANDS ;; + fbcode_stress_crash) + set -f + echo $FBCODE_STRESS_CRASH_TEST_COMMANDS + set +f + ;; *) echo "Invalid determinator command" exit 1 diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/run_ci_db_test.ps1 mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/run_ci_db_test.ps1 --- mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/run_ci_db_test.ps1 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/run_ci_db_test.ps1 2025-05-19 16:14:27.000000000 +0000 @@ -68,7 +68,7 @@ if($WorkFolder -eq "") { - # If TEST_TMPDIR is set use it + # If TEST_TMPDIR is set use it [string]$var = $Env:TEST_TMPDIR if($var -eq "") { $WorkFolder = -Join($RootFolder, "\db_tests\") @@ -93,7 +93,7 @@ if($ExcludeCases -ne "") { Write-Host "ExcludeCases: $ExcludeCases" $l = $ExcludeCases -split ' ' - ForEach($t in $l) { + ForEach($t in $l) { $ExcludeCasesSet.Add($t) | Out-Null } } @@ -102,7 +102,7 @@ if($ExcludeExes -ne "") { Write-Host "ExcludeExe: $ExcludeExes" $l = $ExcludeExes -split ' ' - ForEach($t in $l) { + ForEach($t in $l) { $ExcludeExesSet.Add($t) | Out-Null } } @@ -118,6 +118,10 @@ # MultiThreaded/MultiThreadedDBTest. # MultiThreaded/0 # GetParam() = 0 # MultiThreaded/1 # GetParam() = 1 +# RibbonTypeParamTest/0. # TypeParam = struct DefaultTypesAndSettings +# CompactnessAndBacktrackAndFpRate +# Extremes +# FindOccupancyForSuccessRate # # into this: # @@ -125,6 +129,9 @@ # DBTest.WriteEmptyBatch # MultiThreaded/MultiThreadedDBTest.MultiThreaded/0 # MultiThreaded/MultiThreadedDBTest.MultiThreaded/1 +# RibbonTypeParamTest/0.CompactnessAndBacktrackAndFpRate +# RibbonTypeParamTest/0.Extremes +# RibbonTypeParamTest/0.FindOccupancyForSuccessRate # # Output into the parameter in a form TestName -> Log File Name function ExtractTestCases([string]$GTestExe, $HashTable) { @@ -138,6 +145,8 @@ ForEach( $l in $Tests) { + # remove trailing comment if any + $l = $l -replace '\s+\#.*','' # Leading whitespace is fine $l = $l -replace '^\s+','' # Trailing dot is a test group but no whitespace @@ -146,8 +155,7 @@ } else { # Otherwise it is a test name, remove leading space $test = $l - # remove trailing comment if any and create a log name - $test = $test -replace '\s+\#.*','' + # create a log name $test = "$Group$test" if($ExcludeCasesSet.Contains($test)) { @@ -253,7 +261,7 @@ $DiscoveredExe = @() dir -Path $search_path | ForEach-Object { - $DiscoveredExe += ($_.Name) + $DiscoveredExe += ($_.Name) } # Remove exclusions @@ -293,7 +301,7 @@ $ListOfExe = @() dir -Path $search_path | ForEach-Object { - $ListOfExe += ($_.Name) + $ListOfExe += ($_.Name) } # Exclude those in RunOnly from running as suites @@ -348,7 +356,7 @@ # Wait for all to finish and get the results while(($JobToLog.Count -gt 0) -or - ($TestCmds.Count -gt 0) -or + ($TestCmds.Count -gt 0) -or ($Suites.Count -gt 0)) { # Make sure we have maximum concurrent jobs running if anything @@ -468,8 +476,8 @@ $EndDate = (Get-Date) -New-TimeSpan -Start $StartDate -End $EndDate | - ForEach-Object { +New-TimeSpan -Start $StartDate -End $EndDate | + ForEach-Object { "Elapsed time: {0:g}" -f $_ } @@ -484,4 +492,4 @@ exit 0 - + diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/setup_centos7.sh mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/setup_centos7.sh --- mariadb-10.11.11/storage/rocksdb/rocksdb/build_tools/setup_centos7.sh 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/build_tools/setup_centos7.sh 2025-05-19 16:14:27.000000000 +0000 @@ -1,9 +1,9 @@ #!/bin/bash # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -set -e +set -ex -ROCKSDB_VERSION="5.10.3" -ZSTD_VERSION="1.1.3" +ROCKSDB_VERSION="6.7.3" +ZSTD_VERSION="1.4.4" echo "This script configures CentOS with everything needed to build and run RocksDB" @@ -40,5 +40,6 @@ chown -R vagrant:vagrant /usr/local/rocksdb/ sudo -u vagrant make static_lib cd examples/ -sudo -u vagrant make all -sudo -u vagrant ./c_simple_example +sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ make all +sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ ./c_simple_example + diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,72 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "rocksdb/cache.h" + +#include "cache/lru_cache.h" +#include "rocksdb/secondary_cache.h" +#include "rocksdb/utilities/customizable_util.h" +#include "rocksdb/utilities/options_type.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { +#ifndef ROCKSDB_LITE +static std::unordered_map + lru_cache_options_type_info = { + {"capacity", + {offsetof(struct LRUCacheOptions, capacity), OptionType::kSizeT, + OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"num_shard_bits", + {offsetof(struct LRUCacheOptions, num_shard_bits), OptionType::kInt, + OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"strict_capacity_limit", + {offsetof(struct LRUCacheOptions, strict_capacity_limit), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, + {"high_pri_pool_ratio", + {offsetof(struct LRUCacheOptions, high_pri_pool_ratio), + OptionType::kDouble, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, +}; +#endif // ROCKSDB_LITE + +Status SecondaryCache::CreateFromString( + const ConfigOptions& config_options, const std::string& value, + std::shared_ptr* result) { + return LoadSharedObject(config_options, value, nullptr, + result); +} + +Status Cache::CreateFromString(const ConfigOptions& config_options, + const std::string& value, + std::shared_ptr* result) { + Status status; + std::shared_ptr cache; + if (value.find('=') == std::string::npos) { + cache = NewLRUCache(ParseSizeT(value)); + } else { +#ifndef ROCKSDB_LITE + LRUCacheOptions cache_opts; + status = OptionTypeInfo::ParseStruct(config_options, "", + &lru_cache_options_type_info, "", + value, &cache_opts); + if (status.ok()) { + cache = NewLRUCache(cache_opts); + } +#else + (void)config_options; + status = Status::NotSupported("Cannot load cache in LITE mode ", value); +#endif //! ROCKSDB_LITE + } + if (status.ok()) { + result->swap(cache); + } + return status; +} +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_bench.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_bench.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_bench.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_bench.cc 2025-05-19 16:14:27.000000000 +0000 @@ -1,8 +1,11 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// Copyright (c) 2013-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). - +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. #ifndef GFLAGS #include int main() { @@ -10,272 +13,8 @@ return 1; } #else - -#include -#include -#include - -#include "port/port.h" -#include "rocksdb/cache.h" -#include "rocksdb/db.h" -#include "rocksdb/env.h" -#include "util/gflags_compat.h" -#include "util/mutexlock.h" -#include "util/random.h" - -using GFLAGS_NAMESPACE::ParseCommandLineFlags; - -static const uint32_t KB = 1024; - -DEFINE_int32(threads, 16, "Number of concurrent threads to run."); -DEFINE_int64(cache_size, 8 * KB * KB, - "Number of bytes to use as a cache of uncompressed data."); -DEFINE_int32(num_shard_bits, 4, "shard_bits."); - -DEFINE_int64(max_key, 1 * KB * KB * KB, "Max number of key to place in cache"); -DEFINE_uint64(ops_per_thread, 1200000, "Number of operations per thread."); - -DEFINE_bool(populate_cache, false, "Populate cache before operations"); -DEFINE_int32(insert_percent, 40, - "Ratio of insert to total workload (expressed as a percentage)"); -DEFINE_int32(lookup_percent, 50, - "Ratio of lookup to total workload (expressed as a percentage)"); -DEFINE_int32(erase_percent, 10, - "Ratio of erase to total workload (expressed as a percentage)"); - -DEFINE_bool(use_clock_cache, false, ""); - -namespace ROCKSDB_NAMESPACE { - -class CacheBench; -namespace { -void deleter(const Slice& /*key*/, void* value) { - delete reinterpret_cast(value); -} - -// State shared by all concurrent executions of the same benchmark. -class SharedState { - public: - explicit SharedState(CacheBench* cache_bench) - : cv_(&mu_), - num_threads_(FLAGS_threads), - num_initialized_(0), - start_(false), - num_done_(0), - cache_bench_(cache_bench) { - } - - ~SharedState() {} - - port::Mutex* GetMutex() { - return &mu_; - } - - port::CondVar* GetCondVar() { - return &cv_; - } - - CacheBench* GetCacheBench() const { - return cache_bench_; - } - - void IncInitialized() { - num_initialized_++; - } - - void IncDone() { - num_done_++; - } - - bool AllInitialized() const { - return num_initialized_ >= num_threads_; - } - - bool AllDone() const { - return num_done_ >= num_threads_; - } - - void SetStart() { - start_ = true; - } - - bool Started() const { - return start_; - } - - private: - port::Mutex mu_; - port::CondVar cv_; - - const uint64_t num_threads_; - uint64_t num_initialized_; - bool start_; - uint64_t num_done_; - - CacheBench* cache_bench_; -}; - -// Per-thread state for concurrent executions of the same benchmark. -struct ThreadState { - uint32_t tid; - Random rnd; - SharedState* shared; - - ThreadState(uint32_t index, SharedState* _shared) - : tid(index), rnd(1000 + index), shared(_shared) {} -}; -} // namespace - -class CacheBench { - public: - CacheBench() : num_threads_(FLAGS_threads) { - if (FLAGS_use_clock_cache) { - cache_ = NewClockCache(FLAGS_cache_size, FLAGS_num_shard_bits); - if (!cache_) { - fprintf(stderr, "Clock cache not supported.\n"); - exit(1); - } - } else { - cache_ = NewLRUCache(FLAGS_cache_size, FLAGS_num_shard_bits); - } - } - - ~CacheBench() {} - - void PopulateCache() { - Random rnd(1); - for (int64_t i = 0; i < FLAGS_cache_size; i++) { - uint64_t rand_key = rnd.Next() % FLAGS_max_key; - // Cast uint64* to be char*, data would be copied to cache - Slice key(reinterpret_cast(&rand_key), 8); - // do insert - cache_->Insert(key, new char[10], 1, &deleter); - } - } - - bool Run() { - ROCKSDB_NAMESPACE::Env* env = ROCKSDB_NAMESPACE::Env::Default(); - - PrintEnv(); - SharedState shared(this); - std::vector threads(num_threads_); - for (uint32_t i = 0; i < num_threads_; i++) { - threads[i] = new ThreadState(i, &shared); - env->StartThread(ThreadBody, threads[i]); - } - { - MutexLock l(shared.GetMutex()); - while (!shared.AllInitialized()) { - shared.GetCondVar()->Wait(); - } - // Record start time - uint64_t start_time = env->NowMicros(); - - // Start all threads - shared.SetStart(); - shared.GetCondVar()->SignalAll(); - - // Wait threads to complete - while (!shared.AllDone()) { - shared.GetCondVar()->Wait(); - } - - // Record end time - uint64_t end_time = env->NowMicros(); - double elapsed = static_cast(end_time - start_time) * 1e-6; - uint32_t qps = static_cast( - static_cast(FLAGS_threads * FLAGS_ops_per_thread) / elapsed); - fprintf(stdout, "Complete in %.3f s; QPS = %u\n", elapsed, qps); - } - return true; - } - - private: - std::shared_ptr cache_; - uint32_t num_threads_; - - static void ThreadBody(void* v) { - ThreadState* thread = reinterpret_cast(v); - SharedState* shared = thread->shared; - - { - MutexLock l(shared->GetMutex()); - shared->IncInitialized(); - if (shared->AllInitialized()) { - shared->GetCondVar()->SignalAll(); - } - while (!shared->Started()) { - shared->GetCondVar()->Wait(); - } - } - thread->shared->GetCacheBench()->OperateCache(thread); - - { - MutexLock l(shared->GetMutex()); - shared->IncDone(); - if (shared->AllDone()) { - shared->GetCondVar()->SignalAll(); - } - } - } - - void OperateCache(ThreadState* thread) { - for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) { - uint64_t rand_key = thread->rnd.Next() % FLAGS_max_key; - // Cast uint64* to be char*, data would be copied to cache - Slice key(reinterpret_cast(&rand_key), 8); - int32_t prob_op = thread->rnd.Uniform(100); - if (prob_op >= 0 && prob_op < FLAGS_insert_percent) { - // do insert - cache_->Insert(key, new char[10], 1, &deleter); - } else if (prob_op -= FLAGS_insert_percent && - prob_op < FLAGS_lookup_percent) { - // do lookup - auto handle = cache_->Lookup(key); - if (handle) { - cache_->Release(handle); - } - } else if (prob_op -= FLAGS_lookup_percent && - prob_op < FLAGS_erase_percent) { - // do erase - cache_->Erase(key); - } - } - } - - void PrintEnv() const { - printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion); - printf("Number of threads : %d\n", FLAGS_threads); - printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread); - printf("Cache size : %" PRIu64 "\n", FLAGS_cache_size); - printf("Num shard bits : %d\n", FLAGS_num_shard_bits); - printf("Max key : %" PRIu64 "\n", FLAGS_max_key); - printf("Populate cache : %d\n", FLAGS_populate_cache); - printf("Insert percentage : %d%%\n", FLAGS_insert_percent); - printf("Lookup percentage : %d%%\n", FLAGS_lookup_percent); - printf("Erase percentage : %d%%\n", FLAGS_erase_percent); - printf("----------------------------\n"); - } -}; -} // namespace ROCKSDB_NAMESPACE - +#include "rocksdb/cache_bench_tool.h" int main(int argc, char** argv) { - ParseCommandLineFlags(&argc, &argv, true); - - if (FLAGS_threads <= 0) { - fprintf(stderr, "threads number <= 0\n"); - exit(1); - } - - ROCKSDB_NAMESPACE::CacheBench bench; - if (FLAGS_populate_cache) { - bench.PopulateCache(); - } - if (bench.Run()) { - return 0; - } else { - return 1; - } + return ROCKSDB_NAMESPACE::cache_bench_tool(argc, argv); } - #endif // GFLAGS diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_bench_tool.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_bench_tool.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_bench_tool.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_bench_tool.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,794 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifdef GFLAGS +#include +#include +#include +#include +#include +#include +#include + +#include "db/db_impl/db_impl.h" +#include "monitoring/histogram.h" +#include "port/port.h" +#include "rocksdb/cache.h" +#include "rocksdb/convenience.h" +#include "rocksdb/db.h" +#include "rocksdb/env.h" +#include "rocksdb/secondary_cache.h" +#include "rocksdb/system_clock.h" +#include "rocksdb/table_properties.h" +#include "table/block_based/block_based_table_reader.h" +#include "table/block_based/cachable_entry.h" +#include "util/coding.h" +#include "util/gflags_compat.h" +#include "util/hash.h" +#include "util/mutexlock.h" +#include "util/random.h" +#include "util/stop_watch.h" +#include "util/string_util.h" + +using GFLAGS_NAMESPACE::ParseCommandLineFlags; + +static constexpr uint32_t KiB = uint32_t{1} << 10; +static constexpr uint32_t MiB = KiB << 10; +static constexpr uint64_t GiB = MiB << 10; + +DEFINE_uint32(threads, 16, "Number of concurrent threads to run."); +DEFINE_uint64(cache_size, 1 * GiB, + "Number of bytes to use as a cache of uncompressed data."); +DEFINE_uint32(num_shard_bits, 6, "shard_bits."); + +DEFINE_double(resident_ratio, 0.25, + "Ratio of keys fitting in cache to keyspace."); +DEFINE_uint64(ops_per_thread, 2000000U, "Number of operations per thread."); +DEFINE_uint32(value_bytes, 8 * KiB, "Size of each value added."); + +DEFINE_uint32(skew, 5, "Degree of skew in key selection"); +DEFINE_bool(populate_cache, true, "Populate cache before operations"); + +DEFINE_uint32(lookup_insert_percent, 87, + "Ratio of lookup (+ insert on not found) to total workload " + "(expressed as a percentage)"); +DEFINE_uint32(insert_percent, 2, + "Ratio of insert to total workload (expressed as a percentage)"); +DEFINE_uint32(lookup_percent, 10, + "Ratio of lookup to total workload (expressed as a percentage)"); +DEFINE_uint32(erase_percent, 1, + "Ratio of erase to total workload (expressed as a percentage)"); +DEFINE_bool(gather_stats, false, + "Whether to periodically simulate gathering block cache stats, " + "using one more thread."); +DEFINE_uint32( + gather_stats_sleep_ms, 1000, + "How many milliseconds to sleep between each gathering of stats."); + +DEFINE_uint32(gather_stats_entries_per_lock, 256, + "For Cache::ApplyToAllEntries"); +DEFINE_bool(skewed, false, "If true, skew the key access distribution"); +#ifndef ROCKSDB_LITE +DEFINE_string(secondary_cache_uri, "", + "Full URI for creating a custom secondary cache object"); +static class std::shared_ptr secondary_cache; +#endif // ROCKSDB_LITE + +DEFINE_bool(use_clock_cache, false, ""); + +// ## BEGIN stress_cache_key sub-tool options ## +DEFINE_bool(stress_cache_key, false, + "If true, run cache key stress test instead"); +DEFINE_uint32(sck_files_per_day, 2500000, + "(-stress_cache_key) Simulated files generated per day"); +DEFINE_uint32(sck_duration, 90, + "(-stress_cache_key) Number of days to simulate in each run"); +DEFINE_uint32( + sck_min_collision, 15, + "(-stress_cache_key) Keep running until this many collisions seen"); +DEFINE_uint32( + sck_file_size_mb, 32, + "(-stress_cache_key) Simulated file size in MiB, for accounting purposes"); +DEFINE_uint32(sck_reopen_nfiles, 100, + "(-stress_cache_key) Re-opens DB average every n files"); +DEFINE_uint32( + sck_restarts_per_day, 24, + "(-stress_cache_key) Simulated process restarts per day (across DBs)"); +DEFINE_uint32(sck_db_count, 100, + "(-stress_cache_key) Parallel DBs in operation"); +DEFINE_uint32(sck_table_bits, 20, + "(-stress_cache_key) Log2 number of tracked files"); +DEFINE_uint32(sck_keep_bits, 50, + "(-stress_cache_key) Number of cache key bits to keep"); +DEFINE_bool(sck_randomize, false, + "(-stress_cache_key) Randomize (hash) cache key"); +DEFINE_bool(sck_footer_unique_id, false, + "(-stress_cache_key) Simulate using proposed footer unique id"); +// ## END stress_cache_key sub-tool options ## + +namespace ROCKSDB_NAMESPACE { + +class CacheBench; +namespace { +// State shared by all concurrent executions of the same benchmark. +class SharedState { + public: + explicit SharedState(CacheBench* cache_bench) + : cv_(&mu_), + num_initialized_(0), + start_(false), + num_done_(0), + cache_bench_(cache_bench) {} + + ~SharedState() {} + + port::Mutex* GetMutex() { return &mu_; } + + port::CondVar* GetCondVar() { return &cv_; } + + CacheBench* GetCacheBench() const { return cache_bench_; } + + void IncInitialized() { num_initialized_++; } + + void IncDone() { num_done_++; } + + bool AllInitialized() const { return num_initialized_ >= FLAGS_threads; } + + bool AllDone() const { return num_done_ >= FLAGS_threads; } + + void SetStart() { start_ = true; } + + bool Started() const { return start_; } + + private: + port::Mutex mu_; + port::CondVar cv_; + + uint64_t num_initialized_; + bool start_; + uint64_t num_done_; + + CacheBench* cache_bench_; +}; + +// Per-thread state for concurrent executions of the same benchmark. +struct ThreadState { + uint32_t tid; + Random64 rnd; + SharedState* shared; + HistogramImpl latency_ns_hist; + uint64_t duration_us = 0; + + ThreadState(uint32_t index, SharedState* _shared) + : tid(index), rnd(1000 + index), shared(_shared) {} +}; + +struct KeyGen { + char key_data[27]; + + Slice GetRand(Random64& rnd, uint64_t max_key, int max_log) { + uint64_t key = 0; + if (!FLAGS_skewed) { + uint64_t raw = rnd.Next(); + // Skew according to setting + for (uint32_t i = 0; i < FLAGS_skew; ++i) { + raw = std::min(raw, rnd.Next()); + } + key = FastRange64(raw, max_key); + } else { + key = rnd.Skewed(max_log); + if (key > max_key) { + key -= max_key; + } + } + // Variable size and alignment + size_t off = key % 8; + key_data[0] = char{42}; + EncodeFixed64(key_data + 1, key); + key_data[9] = char{11}; + EncodeFixed64(key_data + 10, key); + key_data[18] = char{4}; + EncodeFixed64(key_data + 19, key); + return Slice(&key_data[off], sizeof(key_data) - off); + } +}; + +char* createValue(Random64& rnd) { + char* rv = new char[FLAGS_value_bytes]; + // Fill with some filler data, and take some CPU time + for (uint32_t i = 0; i < FLAGS_value_bytes; i += 8) { + EncodeFixed64(rv + i, rnd.Next()); + } + return rv; +} + +// Callbacks for secondary cache +size_t SizeFn(void* /*obj*/) { return FLAGS_value_bytes; } + +Status SaveToFn(void* obj, size_t /*offset*/, size_t size, void* out) { + memcpy(out, obj, size); + return Status::OK(); +} + +// Different deleters to simulate using deleter to gather +// stats on the code origin and kind of cache entries. +void deleter1(const Slice& /*key*/, void* value) { + delete[] static_cast(value); +} +void deleter2(const Slice& /*key*/, void* value) { + delete[] static_cast(value); +} +void deleter3(const Slice& /*key*/, void* value) { + delete[] static_cast(value); +} + +Cache::CacheItemHelper helper1(SizeFn, SaveToFn, deleter1); +Cache::CacheItemHelper helper2(SizeFn, SaveToFn, deleter2); +Cache::CacheItemHelper helper3(SizeFn, SaveToFn, deleter3); +} // namespace + +class CacheBench { + static constexpr uint64_t kHundredthUint64 = + std::numeric_limits::max() / 100U; + + public: + CacheBench() + : max_key_(static_cast(FLAGS_cache_size / FLAGS_resident_ratio / + FLAGS_value_bytes)), + lookup_insert_threshold_(kHundredthUint64 * + FLAGS_lookup_insert_percent), + insert_threshold_(lookup_insert_threshold_ + + kHundredthUint64 * FLAGS_insert_percent), + lookup_threshold_(insert_threshold_ + + kHundredthUint64 * FLAGS_lookup_percent), + erase_threshold_(lookup_threshold_ + + kHundredthUint64 * FLAGS_erase_percent), + skewed_(FLAGS_skewed) { + if (erase_threshold_ != 100U * kHundredthUint64) { + fprintf(stderr, "Percentages must add to 100.\n"); + exit(1); + } + + max_log_ = 0; + if (skewed_) { + uint64_t max_key = max_key_; + while (max_key >>= 1) max_log_++; + if (max_key > (static_cast(1) << max_log_)) max_log_++; + } + + if (FLAGS_use_clock_cache) { + cache_ = NewClockCache(FLAGS_cache_size, FLAGS_num_shard_bits); + if (!cache_) { + fprintf(stderr, "Clock cache not supported.\n"); + exit(1); + } + } else { + LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, false, 0.5); +#ifndef ROCKSDB_LITE + if (!FLAGS_secondary_cache_uri.empty()) { + Status s = SecondaryCache::CreateFromString( + ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache); + if (secondary_cache == nullptr) { + fprintf( + stderr, + "No secondary cache registered matching string: %s status=%s\n", + FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str()); + exit(1); + } + opts.secondary_cache = secondary_cache; + } +#endif // ROCKSDB_LITE + + cache_ = NewLRUCache(opts); + } + } + + ~CacheBench() {} + + void PopulateCache() { + Random64 rnd(1); + KeyGen keygen; + for (uint64_t i = 0; i < 2 * FLAGS_cache_size; i += FLAGS_value_bytes) { + cache_->Insert(keygen.GetRand(rnd, max_key_, max_log_), createValue(rnd), + &helper1, FLAGS_value_bytes); + } + } + + bool Run() { + const auto clock = SystemClock::Default().get(); + + PrintEnv(); + SharedState shared(this); + std::vector > threads(FLAGS_threads); + for (uint32_t i = 0; i < FLAGS_threads; i++) { + threads[i].reset(new ThreadState(i, &shared)); + std::thread(ThreadBody, threads[i].get()).detach(); + } + + HistogramImpl stats_hist; + std::string stats_report; + std::thread stats_thread(StatsBody, &shared, &stats_hist, &stats_report); + + uint64_t start_time; + { + MutexLock l(shared.GetMutex()); + while (!shared.AllInitialized()) { + shared.GetCondVar()->Wait(); + } + // Record start time + start_time = clock->NowMicros(); + + // Start all threads + shared.SetStart(); + shared.GetCondVar()->SignalAll(); + + // Wait threads to complete + while (!shared.AllDone()) { + shared.GetCondVar()->Wait(); + } + } + + // Stats gathering is considered background work. This time measurement + // is for foreground work, and not really ideal for that. See below. + uint64_t end_time = clock->NowMicros(); + stats_thread.join(); + + // Wall clock time - includes idle time if threads + // finish at different times (not ideal). + double elapsed_secs = static_cast(end_time - start_time) * 1e-6; + uint32_t ops_per_sec = static_cast( + 1.0 * FLAGS_threads * FLAGS_ops_per_thread / elapsed_secs); + printf("Complete in %.3f s; Rough parallel ops/sec = %u\n", elapsed_secs, + ops_per_sec); + + // Total time in each thread (more accurate throughput measure) + elapsed_secs = 0; + for (uint32_t i = 0; i < FLAGS_threads; i++) { + elapsed_secs += threads[i]->duration_us * 1e-6; + } + ops_per_sec = static_cast(1.0 * FLAGS_threads * + FLAGS_ops_per_thread / elapsed_secs); + printf("Thread ops/sec = %u\n", ops_per_sec); + + printf("\nOperation latency (ns):\n"); + HistogramImpl combined; + for (uint32_t i = 0; i < FLAGS_threads; i++) { + combined.Merge(threads[i]->latency_ns_hist); + } + printf("%s", combined.ToString().c_str()); + + if (FLAGS_gather_stats) { + printf("\nGather stats latency (us):\n"); + printf("%s", stats_hist.ToString().c_str()); + } + + printf("\n%s", stats_report.c_str()); + + return true; + } + + private: + std::shared_ptr cache_; + const uint64_t max_key_; + // Cumulative thresholds in the space of a random uint64_t + const uint64_t lookup_insert_threshold_; + const uint64_t insert_threshold_; + const uint64_t lookup_threshold_; + const uint64_t erase_threshold_; + const bool skewed_; + int max_log_; + + // A benchmark version of gathering stats on an active block cache by + // iterating over it. The primary purpose is to measure the impact of + // gathering stats with ApplyToAllEntries on throughput- and + // latency-sensitive Cache users. Performance of stats gathering is + // also reported. The last set of gathered stats is also reported, for + // manual sanity checking for logical errors or other unexpected + // behavior of cache_bench or the underlying Cache. + static void StatsBody(SharedState* shared, HistogramImpl* stats_hist, + std::string* stats_report) { + if (!FLAGS_gather_stats) { + return; + } + const auto clock = SystemClock::Default().get(); + uint64_t total_key_size = 0; + uint64_t total_charge = 0; + uint64_t total_entry_count = 0; + std::set deleters; + StopWatchNano timer(clock); + + for (;;) { + uint64_t time; + time = clock->NowMicros(); + uint64_t deadline = time + uint64_t{FLAGS_gather_stats_sleep_ms} * 1000; + + { + MutexLock l(shared->GetMutex()); + for (;;) { + if (shared->AllDone()) { + std::ostringstream ostr; + ostr << "Most recent cache entry stats:\n" + << "Number of entries: " << total_entry_count << "\n" + << "Total charge: " << BytesToHumanString(total_charge) << "\n" + << "Average key size: " + << (1.0 * total_key_size / total_entry_count) << "\n" + << "Average charge: " + << BytesToHumanString(static_cast( + 1.0 * total_charge / total_entry_count)) + << "\n" + << "Unique deleters: " << deleters.size() << "\n"; + *stats_report = ostr.str(); + return; + } + if (clock->NowMicros() >= deadline) { + break; + } + uint64_t diff = deadline - std::min(clock->NowMicros(), deadline); + shared->GetCondVar()->TimedWait(diff + 1); + } + } + + // Now gather stats, outside of mutex + total_key_size = 0; + total_charge = 0; + total_entry_count = 0; + deleters.clear(); + auto fn = [&](const Slice& key, void* /*value*/, size_t charge, + Cache::DeleterFn deleter) { + total_key_size += key.size(); + total_charge += charge; + ++total_entry_count; + // Something slightly more expensive as in (future) stats by category + deleters.insert(deleter); + }; + timer.Start(); + Cache::ApplyToAllEntriesOptions opts; + opts.average_entries_per_lock = FLAGS_gather_stats_entries_per_lock; + shared->GetCacheBench()->cache_->ApplyToAllEntries(fn, opts); + stats_hist->Add(timer.ElapsedNanos() / 1000); + } + } + + static void ThreadBody(ThreadState* thread) { + SharedState* shared = thread->shared; + + { + MutexLock l(shared->GetMutex()); + shared->IncInitialized(); + if (shared->AllInitialized()) { + shared->GetCondVar()->SignalAll(); + } + while (!shared->Started()) { + shared->GetCondVar()->Wait(); + } + } + thread->shared->GetCacheBench()->OperateCache(thread); + + { + MutexLock l(shared->GetMutex()); + shared->IncDone(); + if (shared->AllDone()) { + shared->GetCondVar()->SignalAll(); + } + } + } + + void OperateCache(ThreadState* thread) { + // To use looked-up values + uint64_t result = 0; + // To hold handles for a non-trivial amount of time + Cache::Handle* handle = nullptr; + KeyGen gen; + const auto clock = SystemClock::Default().get(); + uint64_t start_time = clock->NowMicros(); + StopWatchNano timer(clock); + + for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) { + timer.Start(); + Slice key = gen.GetRand(thread->rnd, max_key_, max_log_); + uint64_t random_op = thread->rnd.Next(); + Cache::CreateCallback create_cb = + [](void* buf, size_t size, void** out_obj, size_t* charge) -> Status { + *out_obj = reinterpret_cast(new char[size]); + memcpy(*out_obj, buf, size); + *charge = size; + return Status::OK(); + }; + + if (random_op < lookup_insert_threshold_) { + if (handle) { + cache_->Release(handle); + handle = nullptr; + } + // do lookup + handle = cache_->Lookup(key, &helper2, create_cb, Cache::Priority::LOW, + true); + if (handle) { + // do something with the data + result += NPHash64(static_cast(cache_->Value(handle)), + FLAGS_value_bytes); + } else { + // do insert + cache_->Insert(key, createValue(thread->rnd), &helper2, + FLAGS_value_bytes, &handle); + } + } else if (random_op < insert_threshold_) { + if (handle) { + cache_->Release(handle); + handle = nullptr; + } + // do insert + cache_->Insert(key, createValue(thread->rnd), &helper3, + FLAGS_value_bytes, &handle); + } else if (random_op < lookup_threshold_) { + if (handle) { + cache_->Release(handle); + handle = nullptr; + } + // do lookup + handle = cache_->Lookup(key, &helper2, create_cb, Cache::Priority::LOW, + true); + if (handle) { + // do something with the data + result += NPHash64(static_cast(cache_->Value(handle)), + FLAGS_value_bytes); + } + } else if (random_op < erase_threshold_) { + // do erase + cache_->Erase(key); + } else { + // Should be extremely unlikely (noop) + assert(random_op >= kHundredthUint64 * 100U); + } + thread->latency_ns_hist.Add(timer.ElapsedNanos()); + } + if (handle) { + cache_->Release(handle); + handle = nullptr; + } + // Ensure computations on `result` are not optimized away. + if (result == 1) { + printf("You are extremely unlucky(2). Try again.\n"); + exit(1); + } + thread->duration_us = clock->NowMicros() - start_time; + } + + void PrintEnv() const { + printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion); + printf("Number of threads : %u\n", FLAGS_threads); + printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread); + printf("Cache size : %s\n", + BytesToHumanString(FLAGS_cache_size).c_str()); + printf("Num shard bits : %u\n", FLAGS_num_shard_bits); + printf("Max key : %" PRIu64 "\n", max_key_); + printf("Resident ratio : %g\n", FLAGS_resident_ratio); + printf("Skew degree : %u\n", FLAGS_skew); + printf("Populate cache : %d\n", int{FLAGS_populate_cache}); + printf("Lookup+Insert pct : %u%%\n", FLAGS_lookup_insert_percent); + printf("Insert percentage : %u%%\n", FLAGS_insert_percent); + printf("Lookup percentage : %u%%\n", FLAGS_lookup_percent); + printf("Erase percentage : %u%%\n", FLAGS_erase_percent); + std::ostringstream stats; + if (FLAGS_gather_stats) { + stats << "enabled (" << FLAGS_gather_stats_sleep_ms << "ms, " + << FLAGS_gather_stats_entries_per_lock << "/lock)"; + } else { + stats << "disabled"; + } + printf("Gather stats : %s\n", stats.str().c_str()); + printf("----------------------------\n"); + } +}; + +// TODO: better description (see PR #9126 for some info) +class StressCacheKey { + public: + void Run() { + if (FLAGS_sck_footer_unique_id) { + FLAGS_sck_db_count = 1; + } + + uint64_t mb_per_day = + uint64_t{FLAGS_sck_files_per_day} * FLAGS_sck_file_size_mb; + printf("Total cache or DBs size: %gTiB Writing %g MiB/s or %gTiB/day\n", + FLAGS_sck_file_size_mb / 1024.0 / 1024.0 * + std::pow(2.0, FLAGS_sck_table_bits), + mb_per_day / 86400.0, mb_per_day / 1024.0 / 1024.0); + multiplier_ = std::pow(2.0, 128 - FLAGS_sck_keep_bits) / + (FLAGS_sck_file_size_mb * 1024.0 * 1024.0); + printf( + "Multiply by %g to correct for simulation losses (but still assume " + "whole file cached)\n", + multiplier_); + restart_nfiles_ = FLAGS_sck_files_per_day / FLAGS_sck_restarts_per_day; + double without_ejection = + std::pow(1.414214, FLAGS_sck_keep_bits) / FLAGS_sck_files_per_day; + printf( + "Without ejection, expect random collision after %g days (%g " + "corrected)\n", + without_ejection, without_ejection * multiplier_); + double with_full_table = + std::pow(2.0, FLAGS_sck_keep_bits - FLAGS_sck_table_bits) / + FLAGS_sck_files_per_day; + printf( + "With ejection and full table, expect random collision after %g " + "days (%g corrected)\n", + with_full_table, with_full_table * multiplier_); + collisions_ = 0; + + for (int i = 1; collisions_ < FLAGS_sck_min_collision; i++) { + RunOnce(); + if (collisions_ == 0) { + printf( + "No collisions after %d x %u days " + " \n", + i, FLAGS_sck_duration); + } else { + double est = 1.0 * i * FLAGS_sck_duration / collisions_; + printf("%" PRIu64 + " collisions after %d x %u days, est %g days between (%g " + "corrected) \n", + collisions_, i, FLAGS_sck_duration, est, est * multiplier_); + } + } + } + + void RunOnce() { + const size_t db_count = FLAGS_sck_db_count; + dbs_.reset(new TableProperties[db_count]{}); + const size_t table_mask = (size_t{1} << FLAGS_sck_table_bits) - 1; + table_.reset(new uint64_t[table_mask + 1]{}); + if (FLAGS_sck_keep_bits > 64) { + FLAGS_sck_keep_bits = 64; + } + uint32_t shift_away = 64 - FLAGS_sck_keep_bits; + uint32_t shift_away_b = shift_away / 3; + uint32_t shift_away_a = shift_away - shift_away_b; + + process_count_ = 0; + session_count_ = 0; + ResetProcess(); + + Random64 r{std::random_device{}()}; + + uint64_t max_file_count = + uint64_t{FLAGS_sck_files_per_day} * FLAGS_sck_duration; + uint64_t file_count = 0; + uint32_t report_count = 0; + uint32_t collisions_this_run = 0; + // Round robin through DBs + for (size_t db_i = 0;; ++db_i) { + if (db_i >= db_count) { + db_i = 0; + } + if (file_count >= max_file_count) { + break; + } + if (!FLAGS_sck_footer_unique_id && r.OneIn(FLAGS_sck_reopen_nfiles)) { + ResetSession(db_i); + } else if (r.OneIn(restart_nfiles_)) { + ResetProcess(); + } + OffsetableCacheKey ock; + dbs_[db_i].orig_file_number += 1; + // skip some file numbers, unless 1 DB so that that can simulate + // better (DB-independent) unique IDs + if (db_count > 1) { + dbs_[db_i].orig_file_number += (r.Next() & 3); + } + BlockBasedTable::SetupBaseCacheKey(&dbs_[db_i], "", 42, 42, &ock); + CacheKey ck = ock.WithOffset(0); + uint64_t stripped; + if (FLAGS_sck_randomize) { + stripped = GetSliceHash64(ck.AsSlice()) >> shift_away; + } else if (FLAGS_sck_footer_unique_id) { + uint32_t a = DecodeFixed32(ck.AsSlice().data() + 4) >> shift_away_a; + uint32_t b = DecodeFixed32(ck.AsSlice().data() + 12) >> shift_away_b; + stripped = (uint64_t{a} << 32) + b; + } else { + uint32_t a = DecodeFixed32(ck.AsSlice().data()) << shift_away_a; + uint32_t b = DecodeFixed32(ck.AsSlice().data() + 12) >> shift_away_b; + stripped = (uint64_t{a} << 32) + b; + } + if (stripped == 0) { + // Unlikely, but we need to exclude tracking this value + printf("Hit Zero! \n"); + continue; + } + file_count++; + uint64_t h = NPHash64(reinterpret_cast(&stripped), 8); + // Skew lifetimes + size_t pos = + std::min(Lower32of64(h) & table_mask, Upper32of64(h) & table_mask); + if (table_[pos] == stripped) { + collisions_this_run++; + // To predict probability of no collisions, we have to get rid of + // correlated collisions, which this takes care of: + ResetProcess(); + } else { + // Replace + table_[pos] = stripped; + } + + if (++report_count == FLAGS_sck_files_per_day) { + report_count = 0; + // Estimate fill % + size_t incr = table_mask / 1000; + size_t sampled_count = 0; + for (size_t i = 0; i <= table_mask; i += incr) { + if (table_[i] != 0) { + sampled_count++; + } + } + // Report + printf( + "%" PRIu64 " days, %" PRIu64 " proc, %" PRIu64 + " sess, %u coll, occ %g%%, ejected %g%% \r", + file_count / FLAGS_sck_files_per_day, process_count_, + session_count_, collisions_this_run, 100.0 * sampled_count / 1000.0, + 100.0 * (1.0 - sampled_count / 1000.0 * table_mask / file_count)); + fflush(stdout); + } + } + collisions_ += collisions_this_run; + } + + void ResetSession(size_t i) { + dbs_[i].db_session_id = DBImpl::GenerateDbSessionId(nullptr); + session_count_++; + } + + void ResetProcess() { + process_count_++; + DBImpl::TEST_ResetDbSessionIdGen(); + for (size_t i = 0; i < FLAGS_sck_db_count; ++i) { + ResetSession(i); + } + if (FLAGS_sck_footer_unique_id) { + dbs_[0].orig_file_number = 0; + } + } + + private: + // Use db_session_id and orig_file_number from TableProperties + std::unique_ptr dbs_; + std::unique_ptr table_; + uint64_t process_count_ = 0; + uint64_t session_count_ = 0; + uint64_t collisions_ = 0; + uint32_t restart_nfiles_ = 0; + double multiplier_ = 0.0; +}; + +int cache_bench_tool(int argc, char** argv) { + ParseCommandLineFlags(&argc, &argv, true); + + if (FLAGS_stress_cache_key) { + // Alternate tool + StressCacheKey().Run(); + return 0; + } + + if (FLAGS_threads <= 0) { + fprintf(stderr, "threads number <= 0\n"); + exit(1); + } + + ROCKSDB_NAMESPACE::CacheBench bench; + if (FLAGS_populate_cache) { + bench.PopulateCache(); + printf("Population complete\n"); + printf("----------------------------\n"); + } + if (bench.Run()) { + return 0; + } else { + return 1; + } +} // namespace ROCKSDB_NAMESPACE +} // namespace ROCKSDB_NAMESPACE + +#endif // GFLAGS diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_entry_roles.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_roles.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_entry_roles.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_roles.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,70 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "cache/cache_entry_roles.h" + +#include + +#include "port/lang.h" + +namespace ROCKSDB_NAMESPACE { + +std::array kCacheEntryRoleToCamelString{{ + "DataBlock", + "FilterBlock", + "FilterMetaBlock", + "DeprecatedFilterBlock", + "IndexBlock", + "OtherBlock", + "WriteBuffer", + "CompressionDictionaryBuildingBuffer", + "FilterConstruction", + "Misc", +}}; + +std::array kCacheEntryRoleToHyphenString{{ + "data-block", + "filter-block", + "filter-meta-block", + "deprecated-filter-block", + "index-block", + "other-block", + "write-buffer", + "compression-dictionary-building-buffer", + "filter-construction", + "misc", +}}; + +namespace { + +struct Registry { + std::mutex mutex; + std::unordered_map role_map; + void Register(Cache::DeleterFn fn, CacheEntryRole role) { + std::lock_guard lock(mutex); + role_map[fn] = role; + } + std::unordered_map Copy() { + std::lock_guard lock(mutex); + return role_map; + } +}; + +Registry& GetRegistry() { + STATIC_AVOID_DESTRUCTION(Registry, registry); + return registry; +} + +} // namespace + +void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role) { + GetRegistry().Register(fn, role); +} + +std::unordered_map CopyCacheDeleterRoleMap() { + return GetRegistry().Copy(); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_entry_roles.h mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_roles.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_entry_roles.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_roles.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,134 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include +#include +#include + +#include "rocksdb/cache.h" + +namespace ROCKSDB_NAMESPACE { + +// Classifications of block cache entries, for reporting statistics +// Adding new enum to this class requires corresponding updates to +// kCacheEntryRoleToCamelString and kCacheEntryRoleToHyphenString +enum class CacheEntryRole { + // Block-based table data block + kDataBlock, + // Block-based table filter block (full or partitioned) + kFilterBlock, + // Block-based table metadata block for partitioned filter + kFilterMetaBlock, + // Block-based table deprecated filter block (old "block-based" filter) + kDeprecatedFilterBlock, + // Block-based table index block + kIndexBlock, + // Other kinds of block-based table block + kOtherBlock, + // WriteBufferManager reservations to account for memtable usage + kWriteBuffer, + // BlockBasedTableBuilder reservations to account for + // compression dictionary building buffer's memory usage + kCompressionDictionaryBuildingBuffer, + // Filter reservations to account for + // (new) bloom and ribbon filter construction's memory usage + kFilterConstruction, + // Default bucket, for miscellaneous cache entries. Do not use for + // entries that could potentially add up to large usage. + kMisc, +}; +constexpr uint32_t kNumCacheEntryRoles = + static_cast(CacheEntryRole::kMisc) + 1; + +extern std::array + kCacheEntryRoleToCamelString; +extern std::array + kCacheEntryRoleToHyphenString; + +// To associate cache entries with their role, we use a hack on the +// existing Cache interface. Because the deleter of an entry can authenticate +// the code origin of an entry, we can elaborate the choice of deleter to +// also encode role information, without inferring false role information +// from entries not choosing to encode a role. +// +// The rest of this file is for handling mappings between deleters and +// roles. + +// To infer a role from a deleter, the deleter must be registered. This +// can be done "manually" with this function. This function is thread-safe, +// and the registration mappings go into private but static storage. (Note +// that DeleterFn is a function pointer, not std::function. Registrations +// should not be too many.) +void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role); + +// Gets a copy of the registered deleter -> role mappings. This is the only +// function for reading the mappings made with RegisterCacheDeleterRole. +// Why only this interface for reading? +// * This function has to be thread safe, which could incur substantial +// overhead. We should not pay this overhead for every deleter look-up. +// * This is suitable for preparing for batch operations, like with +// CacheEntryStatsCollector. +// * The number of mappings should be sufficiently small (dozens). +std::unordered_map CopyCacheDeleterRoleMap(); + +// ************************************************************** // +// An automatic registration infrastructure. This enables code +// to simply ask for a deleter associated with a particular type +// and role, and registration is automatic. In a sense, this is +// a small dependency injection infrastructure, because linking +// in new deleter instantiations is essentially sufficient for +// making stats collection (using CopyCacheDeleterRoleMap) aware +// of them. + +namespace cache_entry_roles_detail { + +template +struct RegisteredDeleter { + RegisteredDeleter() { RegisterCacheDeleterRole(Delete, R); } + + // These have global linkage to help ensure compiler optimizations do not + // break uniqueness for each + static void Delete(const Slice& /* key */, void* value) { + // Supports T == Something[], unlike delete operator + std::default_delete()( + static_cast::type*>(value)); + } +}; + +template +struct RegisteredNoopDeleter { + RegisteredNoopDeleter() { RegisterCacheDeleterRole(Delete, R); } + + static void Delete(const Slice& /* key */, void* /* value */) { + // Here was `assert(value == nullptr);` but we can also put pointers + // to static data in Cache, for testing at least. + } +}; + +} // namespace cache_entry_roles_detail + +// Get an automatically registered deleter for value type T and role R. +// Based on C++ semantics, registration is invoked exactly once in a +// thread-safe way on first call to this function, for each . +template +Cache::DeleterFn GetCacheEntryDeleterForRole() { + static cache_entry_roles_detail::RegisteredDeleter reg; + return reg.Delete; +} + +// Get an automatically registered no-op deleter (value should be nullptr) +// and associated with role R. This is used for Cache "reservation" entries +// such as for WriteBufferManager. +template +Cache::DeleterFn GetNoopDeleterForRole() { + static cache_entry_roles_detail::RegisteredNoopDeleter reg; + return reg.Delete; +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_entry_stats.h mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_stats.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_entry_stats.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_entry_stats.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,183 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include +#include + +#include "cache/cache_helpers.h" +#include "cache/cache_key.h" +#include "port/lang.h" +#include "rocksdb/cache.h" +#include "rocksdb/status.h" +#include "rocksdb/system_clock.h" +#include "test_util/sync_point.h" +#include "util/coding_lean.h" + +namespace ROCKSDB_NAMESPACE { + +// A generic helper object for gathering stats about cache entries by +// iterating over them with ApplyToAllEntries. This class essentially +// solves the problem of slowing down a Cache with too many stats +// collectors that could be sharing stat results, such as from multiple +// column families or multiple DBs sharing a Cache. We employ a few +// mitigations: +// * Only one collector for a particular kind of Stats is alive +// for each Cache. This is guaranteed using the Cache itself to hold +// the collector. +// * A mutex ensures only one thread is gathering stats for this +// collector. +// * The most recent gathered stats are saved and simply copied to +// satisfy requests within a time window (default: 3 minutes) of +// completion of the most recent stat gathering. +// +// Template parameter Stats must be copyable and trivially constructable, +// as well as... +// concept Stats { +// // Notification before applying callback to all entries +// void BeginCollection(Cache*, SystemClock*, uint64_t start_time_micros); +// // Get the callback to apply to all entries. `callback` +// // type must be compatible with Cache::ApplyToAllEntries +// callback GetEntryCallback(); +// // Notification after applying callback to all entries +// void EndCollection(Cache*, SystemClock*, uint64_t end_time_micros); +// // Notification that a collection was skipped because of +// // sufficiently recent saved results. +// void SkippedCollection(); +// } +template +class CacheEntryStatsCollector { + public: + // Gather and save stats if saved stats are too old. (Use GetStats() to + // read saved stats.) + // + // Maximum allowed age for a "hit" on saved results is determined by the + // two interval parameters. Both set to 0 forces a re-scan. For example + // with min_interval_seconds=300 and min_interval_factor=100, if the last + // scan took 10s, we would only rescan ("miss") if the age in seconds of + // the saved results is > max(300, 100*10). + // Justification: scans can vary wildly in duration, e.g. from 0.02 sec + // to as much as 20 seconds, so we want to be able to cap the absolute + // and relative frequency of scans. + void CollectStats(int min_interval_seconds, int min_interval_factor) { + // Waits for any pending reader or writer (collector) + std::lock_guard lock(working_mutex_); + + uint64_t max_age_micros = + static_cast(std::max(min_interval_seconds, 0)) * 1000000U; + + if (last_end_time_micros_ > last_start_time_micros_ && + min_interval_factor > 0) { + max_age_micros = std::max( + max_age_micros, min_interval_factor * (last_end_time_micros_ - + last_start_time_micros_)); + } + + uint64_t start_time_micros = clock_->NowMicros(); + if ((start_time_micros - last_end_time_micros_) > max_age_micros) { + last_start_time_micros_ = start_time_micros; + working_stats_.BeginCollection(cache_, clock_, start_time_micros); + + cache_->ApplyToAllEntries(working_stats_.GetEntryCallback(), {}); + TEST_SYNC_POINT_CALLBACK( + "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries", nullptr); + + uint64_t end_time_micros = clock_->NowMicros(); + last_end_time_micros_ = end_time_micros; + working_stats_.EndCollection(cache_, clock_, end_time_micros); + } else { + working_stats_.SkippedCollection(); + } + + // Save so that we don't need to wait for an outstanding collection in + // order to make of copy of the last saved stats + std::lock_guard lock2(saved_mutex_); + saved_stats_ = working_stats_; + } + + // Gets saved stats, regardless of age + void GetStats(Stats *stats) { + std::lock_guard lock(saved_mutex_); + *stats = saved_stats_; + } + + Cache *GetCache() const { return cache_; } + + // Gets or creates a shared instance of CacheEntryStatsCollector in the + // cache itself, and saves into `ptr`. This shared_ptr will hold the + // entry in cache until all refs are destroyed. + static Status GetShared(Cache *cache, SystemClock *clock, + std::shared_ptr *ptr) { + const Slice &cache_key = GetCacheKey(); + + Cache::Handle *h = cache->Lookup(cache_key); + if (h == nullptr) { + // Not yet in cache, but Cache doesn't provide a built-in way to + // avoid racing insert. So we double-check under a shared mutex, + // inspired by TableCache. + STATIC_AVOID_DESTRUCTION(std::mutex, static_mutex); + std::lock_guard lock(static_mutex); + + h = cache->Lookup(cache_key); + if (h == nullptr) { + auto new_ptr = new CacheEntryStatsCollector(cache, clock); + // TODO: non-zero charge causes some tests that count block cache + // usage to go flaky. Fix the problem somehow so we can use an + // accurate charge. + size_t charge = 0; + Status s = cache->Insert(cache_key, new_ptr, charge, Deleter, &h, + Cache::Priority::HIGH); + if (!s.ok()) { + assert(h == nullptr); + delete new_ptr; + return s; + } + } + } + // If we reach here, shared entry is in cache with handle `h`. + assert(cache->GetDeleter(h) == Deleter); + + // Build an aliasing shared_ptr that keeps `ptr` in cache while there + // are references. + *ptr = MakeSharedCacheHandleGuard(cache, h); + return Status::OK(); + } + + private: + explicit CacheEntryStatsCollector(Cache *cache, SystemClock *clock) + : saved_stats_(), + working_stats_(), + last_start_time_micros_(0), + last_end_time_micros_(/*pessimistic*/ 10000000), + cache_(cache), + clock_(clock) {} + + static void Deleter(const Slice &, void *value) { + delete static_cast(value); + } + + static const Slice &GetCacheKey() { + // For each template instantiation + static CacheKey ckey = CacheKey::CreateUniqueForProcessLifetime(); + static Slice ckey_slice = ckey.AsSlice(); + return ckey_slice; + } + + std::mutex saved_mutex_; + Stats saved_stats_; + + std::mutex working_mutex_; + Stats working_stats_; + uint64_t last_start_time_micros_; + uint64_t last_end_time_micros_; + + Cache *const cache_; + SystemClock *const clock_; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_helpers.h mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_helpers.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_helpers.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_helpers.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,125 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "rocksdb/cache.h" +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +// Returns the cached value given a cache handle. +template +T* GetFromCacheHandle(Cache* cache, Cache::Handle* handle) { + assert(cache); + assert(handle); + + return static_cast(cache->Value(handle)); +} + +// Simple generic deleter for Cache (to be used with Cache::Insert). +template +void DeleteCacheEntry(const Slice& /* key */, void* value) { + delete static_cast(value); +} + +// Turns a T* into a Slice so it can be used as a key with Cache. +template +Slice GetSlice(const T* t) { + return Slice(reinterpret_cast(t), sizeof(T)); +} + +// Generic resource management object for cache handles that releases the handle +// when destroyed. Has unique ownership of the handle, so copying it is not +// allowed, while moving it transfers ownership. +template +class CacheHandleGuard { + public: + CacheHandleGuard() = default; + + CacheHandleGuard(Cache* cache, Cache::Handle* handle) + : cache_(cache), + handle_(handle), + value_(GetFromCacheHandle(cache, handle)) { + assert(cache_ && handle_ && value_); + } + + CacheHandleGuard(const CacheHandleGuard&) = delete; + CacheHandleGuard& operator=(const CacheHandleGuard&) = delete; + + CacheHandleGuard(CacheHandleGuard&& rhs) noexcept + : cache_(rhs.cache_), handle_(rhs.handle_), value_(rhs.value_) { + assert((!cache_ && !handle_ && !value_) || (cache_ && handle_ && value_)); + + rhs.ResetFields(); + } + + CacheHandleGuard& operator=(CacheHandleGuard&& rhs) noexcept { + if (this == &rhs) { + return *this; + } + + ReleaseHandle(); + + cache_ = rhs.cache_; + handle_ = rhs.handle_; + value_ = rhs.value_; + + assert((!cache_ && !handle_ && !value_) || (cache_ && handle_ && value_)); + + rhs.ResetFields(); + + return *this; + } + + ~CacheHandleGuard() { ReleaseHandle(); } + + bool IsEmpty() const { return !handle_; } + + Cache* GetCache() const { return cache_; } + Cache::Handle* GetCacheHandle() const { return handle_; } + T* GetValue() const { return value_; } + + void Reset() { + ReleaseHandle(); + ResetFields(); + } + + private: + void ReleaseHandle() { + if (IsEmpty()) { + return; + } + + assert(cache_); + cache_->Release(handle_); + } + + void ResetFields() { + cache_ = nullptr; + handle_ = nullptr; + value_ = nullptr; + } + + private: + Cache* cache_ = nullptr; + Cache::Handle* handle_ = nullptr; + T* value_ = nullptr; +}; + +// Build an aliasing shared_ptr that keeps `handle` in cache while there +// are references, but the pointer is to the value for that cache entry, +// which must be of type T. This is copyable, unlike CacheHandleGuard, but +// does not provide access to caching details. +template +std::shared_ptr MakeSharedCacheHandleGuard(Cache* cache, + Cache::Handle* handle) { + auto wrapper = std::make_shared>(cache, handle); + return std::shared_ptr(wrapper, static_cast(cache->Value(handle))); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_key.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_key.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_key.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_key.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,271 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "cache/cache_key.h" + +#include +#include + +#include "rocksdb/cache.h" +#include "table/unique_id_impl.h" +#include "util/hash.h" +#include "util/math.h" + +namespace ROCKSDB_NAMESPACE { + +// Value space plan for CacheKey: +// +// session_etc64_ | offset_etc64_ | Only generated by +// ---------------+---------------+------------------------------------------ +// 0 | 0 | Reserved for "empty" CacheKey() +// 0 | > 0, < 1<<63 | CreateUniqueForCacheLifetime +// 0 | >= 1<<63 | CreateUniqueForProcessLifetime +// > 0 | any | OffsetableCacheKey.WithOffset + +CacheKey CacheKey::CreateUniqueForCacheLifetime(Cache *cache) { + // +1 so that we can reserve all zeros for "unset" cache key + uint64_t id = cache->NewId() + 1; + // Ensure we don't collide with CreateUniqueForProcessLifetime + assert((id >> 63) == 0U); + return CacheKey(0, id); +} + +CacheKey CacheKey::CreateUniqueForProcessLifetime() { + // To avoid colliding with CreateUniqueForCacheLifetime, assuming + // Cache::NewId counts up from zero, here we count down from UINT64_MAX. + // If this ever becomes a point of contention, we could use CoreLocalArray. + static std::atomic counter{UINT64_MAX}; + uint64_t id = counter.fetch_sub(1, std::memory_order_relaxed); + // Ensure we don't collide with CreateUniqueForCacheLifetime + assert((id >> 63) == 1U); + return CacheKey(0, id); +} + +// Value plan for CacheKeys from OffsetableCacheKey, assuming that +// db_session_ids are generated from a base_session_id and +// session_id_counter (by SemiStructuredUniqueIdGen+EncodeSessionId +// in DBImpl::GenerateDbSessionId): +// +// Conceptual inputs: +// db_id (unstructured, from GenerateRawUniqueId or equiv) +// * could be shared between cloned DBs but rare +// * could be constant, if session id suffices +// base_session_id (unstructured, from GenerateRawUniqueId) +// session_id_counter (structured) +// * usually much smaller than 2**24 +// file_number (structured) +// * usually smaller than 2**24 +// offset_in_file (structured, might skip lots of values) +// * usually smaller than 2**32 +// max_offset determines placement of file_number to prevent +// overlapping with offset +// +// Outputs come from bitwise-xor of the constituent pieces, low bits on left: +// +// |------------------------- session_etc64 -------------------------| +// | +++++++++++++++ base_session_id (lower 64 bits) +++++++++++++++ | +// |-----------------------------------------------------------------| +// | session_id_counter ...| | +// |-----------------------------------------------------------------| +// | | ... file_number | +// | | overflow & meta | +// |-----------------------------------------------------------------| +// +// +// |------------------------- offset_etc64 --------------------------| +// | hash of: ++++++++++++++++++++++++++++++++++++++++++++++++++++++ | +// | * base_session_id (upper ~39 bits) | +// | * db_id (~122 bits entropy) | +// |-----------------------------------------------------------------| +// | offset_in_file ............... | | +// |-----------------------------------------------------------------| +// | | file_number, 0-3 | +// | | lower bytes | +// |-----------------------------------------------------------------| +// +// Based on max_offset, a maximal number of bytes 0..3 is chosen for +// including from lower bits of file_number in offset_etc64. The choice +// is encoded in two bits of metadata going into session_etc64, though +// the common case of 3 bytes is encoded as 0 so that session_etc64 +// is unmodified by file_number concerns in the common case. +// +// There is nothing preventing "file number overflow & meta" from meeting +// and overlapping with session_id_counter, but reaching such a case requires +// an intractable combination of large file offsets (thus at least some large +// files), large file numbers (thus large number of files generated), and +// large number of session IDs generated in a single process. A trillion each +// (2**40) of session ids, offsets, and file numbers comes to 120 bits. +// With two bits of metadata and byte granularity, this is on the verge of +// overlap, but even in the overlap case, it doesn't seem likely that +// a file from billions of files or session ids ago will still be live +// or cached. +// +// In fact, if our SST files are all < 4TB (see +// BlockBasedTable::kMaxFileSizeStandardEncoding), then SST files generated +// in a single process are guaranteed to have unique cache keys, unless/until +// number session ids * max file number = 2**86, e.g. 1 trillion DB::Open in +// a single process and 64 trillion files generated. Even at that point, to +// see a collision we would need a miraculous re-synchronization of session +// id and file number, along with a live file or stale cache entry from +// trillions of files ago. +// +// How https://github.com/pdillinger/unique_id applies here: +// Every bit of output always includes "unstructured" uniqueness bits and +// often combines with "structured" uniqueness bits. The "unstructured" bits +// change infrequently: only when we cannot guarantee our state tracking for +// "structured" uniqueness hasn't been cloned. Using a static +// SemiStructuredUniqueIdGen for db_session_ids, this means we only get an +// "all new" session id when a new process uses RocksDB. (Between processes, +// we don't know if a DB or other persistent storage has been cloned.) Within +// a process, only the session_lower of the db_session_id changes +// incrementally ("structured" uniqueness). +// +// This basically means that our offsets, counters and file numbers allow us +// to do somewhat "better than random" (birthday paradox) while in the +// degenerate case of completely new session for each tiny file, we still +// have strong uniqueness properties from the birthday paradox, with ~103 +// bit session IDs or up to 128 bits entropy with different DB IDs sharing a +// cache. +// +// More collision probability analysis: +// Suppose a RocksDB host generates (generously) 2 GB/s (10TB data, 17 DWPD) +// with average process/session lifetime of (pessimistically) 4 minutes. +// In 180 days (generous allowable data lifespan), we generate 31 million GB +// of data, or 2^55 bytes, and 2^16 "all new" session IDs. +// +// First, suppose this is in a single DB (lifetime 180 days): +// 128 bits cache key size +// - 55 <- ideal size for byte offsets + file numbers +// - 2 <- bits for offsets and file numbers not exactly powers of two +// - 2 <- bits for file number encoding metadata +// + 2 <- bits saved not using byte offsets in BlockBasedTable::GetCacheKey +// ---- +// 71 <- bits remaining for distinguishing session IDs +// The probability of a collision in 71 bits of session ID data is less than +// 1 in 2**(71 - (2 * 16)), or roughly 1 in a trillion. And this assumes all +// data from the last 180 days is in cache for potential collision, and that +// cache keys under each session id exhaustively cover the remaining 57 bits +// while in reality they'll only cover a small fraction of it. +// +// Although data could be transferred between hosts, each host has its own +// cache and we are already assuming a high rate of "all new" session ids. +// So this doesn't really change the collision calculation. Across a fleet +// of 1 million, each with <1 in a trillion collision possibility, +// fleetwide collision probability is <1 in a million. +// +// Now suppose we have many DBs per host, say 2**10, with same host-wide write +// rate and process/session lifetime. File numbers will be ~10 bits smaller +// and we will have 2**10 times as many session IDs because of simultaneous +// lifetimes. So now collision chance is less than 1 in 2**(81 - (2 * 26)), +// or roughly 1 in a billion. +// +// Suppose instead we generated random or hashed cache keys for each +// (compressed) block. For 1KB compressed block size, that is 2^45 cache keys +// in 180 days. Collision probability is more easily estimated at roughly +// 1 in 2**(128 - (2 * 45)) or roughly 1 in a trillion (assuming all +// data from the last 180 days is in cache, but NOT the other assumption +// for the 1 in a trillion estimate above). +// +// Conclusion: Burning through session IDs, particularly "all new" IDs that +// only arise when a new process is started, is the only way to have a +// plausible chance of cache key collision. When processes live for hours +// or days, the chance of a cache key collision seems more plausibly due +// to bad hardware than to bad luck in random session ID data. +// +OffsetableCacheKey::OffsetableCacheKey(const std::string &db_id, + const std::string &db_session_id, + uint64_t file_number, + uint64_t max_offset) { +#ifndef NDEBUG + max_offset_ = max_offset; +#endif + // Closely related to GetSstInternalUniqueId, but only need 128 bits and + // need to include an offset within the file. + // See also https://github.com/pdillinger/unique_id for background. + uint64_t session_upper = 0; // Assignment to appease clang-analyze + uint64_t session_lower = 0; // Assignment to appease clang-analyze + { + Status s = DecodeSessionId(db_session_id, &session_upper, &session_lower); + if (!s.ok()) { + // A reasonable fallback in case malformed + Hash2x64(db_session_id.data(), db_session_id.size(), &session_upper, + &session_lower); + } + } + + // Hash the session upper (~39 bits entropy) and DB id (120+ bits entropy) + // for more global uniqueness entropy. + // (It is possible that many DBs descended from one common DB id are copied + // around and proliferate, in which case session id is critical, but it is + // more common for different DBs to have different DB ids.) + uint64_t db_hash = Hash64(db_id.data(), db_id.size(), session_upper); + + // This establishes the db+session id part of the cache key. + // + // Exactly preserve (in common cases; see modifiers below) session lower to + // ensure that session ids generated during the same process lifetime are + // guaranteed unique. + // + // We put this first for CommonPrefixSlice(), so that a small-ish set of + // cache key prefixes to cover entries relevant to any DB. + session_etc64_ = session_lower; + // This provides extra entopy in case of different DB id or process + // generating a session id, but is also partly/variably obscured by + // file_number and offset (see below). + offset_etc64_ = db_hash; + + // Into offset_etc64_ we are (eventually) going to pack & xor in an offset and + // a file_number, but we might need the file_number to overflow into + // session_etc64_. (There must only be one session_etc64_ value per + // file, and preferably shared among many files.) + // + // Figure out how many bytes of file_number we are going to be able to + // pack in with max_offset, though our encoding will only support packing + // in up to 3 bytes of file_number. (16M file numbers is enough for a new + // file number every second for half a year.) + int file_number_bytes_in_offset_etc = + (63 - FloorLog2(max_offset | 0x100000000U)) / 8; + int file_number_bits_in_offset_etc = file_number_bytes_in_offset_etc * 8; + + // Assert two bits of metadata + assert(file_number_bytes_in_offset_etc >= 0 && + file_number_bytes_in_offset_etc <= 3); + // Assert we couldn't have used a larger allowed number of bytes (shift + // would chop off bytes). + assert(file_number_bytes_in_offset_etc == 3 || + (max_offset << (file_number_bits_in_offset_etc + 8) >> + (file_number_bits_in_offset_etc + 8)) != max_offset); + + uint64_t mask = (uint64_t{1} << (file_number_bits_in_offset_etc)) - 1; + // Pack into high bits of etc so that offset can go in low bits of etc + // TODO: could be EndianSwapValue? + uint64_t offset_etc_modifier = ReverseBits(file_number & mask); + assert(offset_etc_modifier << file_number_bits_in_offset_etc == 0U); + + // Overflow and 3 - byte count (likely both zero) go into session_id part + uint64_t session_etc_modifier = + (file_number >> file_number_bits_in_offset_etc << 2) | + static_cast(3 - file_number_bytes_in_offset_etc); + // Packed into high bits to minimize interference with session id counter. + session_etc_modifier = ReverseBits(session_etc_modifier); + + // Assert session_id part is only modified in extreme cases + assert(session_etc_modifier == 0 || file_number > /*3 bytes*/ 0xffffffU || + max_offset > /*5 bytes*/ 0xffffffffffU); + + // Xor in the modifiers + session_etc64_ ^= session_etc_modifier; + offset_etc64_ ^= offset_etc_modifier; + + // Although DBImpl guarantees (in recent versions) that session_lower is not + // zero, that's not entirely sufficient to guarantee that session_etc64_ is + // not zero (so that the 0 case can be used by CacheKey::CreateUnique*) + if (session_etc64_ == 0U) { + session_etc64_ = session_upper | 1U; + } + assert(session_etc64_ != 0); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_key.h mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_key.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_key.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_key.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,132 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/slice.h" + +namespace ROCKSDB_NAMESPACE { + +class Cache; + +// A standard holder for fixed-size block cache keys (and for related caches). +// They are created through one of these, each using its own range of values: +// * CacheKey::CreateUniqueForCacheLifetime +// * CacheKey::CreateUniqueForProcessLifetime +// * Default ctor ("empty" cache key) +// * OffsetableCacheKey->WithOffset +// +// The first two use atomic counters to guarantee uniqueness over the given +// lifetime and the last uses a form of universally unique identifier for +// uniqueness with very high probabilty (and guaranteed for files generated +// during a single process lifetime). +// +// CacheKeys are currently used by calling AsSlice() to pass as a key to +// Cache. For performance, the keys are endianness-dependent (though otherwise +// portable). (Persistable cache entries are not intended to cross platforms.) +class CacheKey { + public: + // For convenience, constructs an "empty" cache key that is never returned + // by other means. + inline CacheKey() : session_etc64_(), offset_etc64_() {} + + inline bool IsEmpty() const { + return (session_etc64_ == 0) & (offset_etc64_ == 0); + } + + // Use this cache key as a Slice (byte order is endianness-dependent) + inline Slice AsSlice() const { + static_assert(sizeof(*this) == 16, "Standardized on 16-byte cache key"); + assert(!IsEmpty()); + return Slice(reinterpret_cast(this), sizeof(*this)); + } + + // Create a CacheKey that is unique among others associated with this Cache + // instance. Depends on Cache::NewId. This is useful for block cache + // "reservations". + static CacheKey CreateUniqueForCacheLifetime(Cache *cache); + + // Create a CacheKey that is unique among others for the lifetime of this + // process. This is useful for saving in a static data member so that + // different DB instances can agree on a cache key for shared entities, + // such as for CacheEntryStatsCollector. + static CacheKey CreateUniqueForProcessLifetime(); + + protected: + friend class OffsetableCacheKey; + CacheKey(uint64_t session_etc64, uint64_t offset_etc64) + : session_etc64_(session_etc64), offset_etc64_(offset_etc64) {} + uint64_t session_etc64_; + uint64_t offset_etc64_; +}; + +// A file-specific generator of cache keys, sometimes referred to as the +// "base" cache key for a file because all the cache keys for various offsets +// within the file are computed using simple arithmetic. The basis for the +// general approach is dicussed here: https://github.com/pdillinger/unique_id +// Heavily related to GetUniqueIdFromTableProperties. +// +// If the db_id, db_session_id, and file_number come from the file's table +// properties, then the keys will be stable across DB::Open/Close, backup/ +// restore, import/export, etc. +// +// This class "is a" CacheKey only privately so that it is not misused as +// a ready-to-use CacheKey. +class OffsetableCacheKey : private CacheKey { + public: + // For convenience, constructs an "empty" cache key that should not be used. + inline OffsetableCacheKey() : CacheKey() {} + + // Constructs an OffsetableCacheKey with the given information about a file. + // max_offset is based on file size (see WithOffset) and is required here to + // choose an appropriate (sub-)encoding. This constructor never generates an + // "empty" base key. + OffsetableCacheKey(const std::string &db_id, const std::string &db_session_id, + uint64_t file_number, uint64_t max_offset); + + inline bool IsEmpty() const { + bool result = session_etc64_ == 0; + assert(!(offset_etc64_ > 0 && result)); + return result; + } + + // Construct a CacheKey for an offset within a file, which must be + // <= max_offset provided in constructor. An offset is not necessarily a + // byte offset if a smaller unique identifier of keyable offsets is used. + // + // This class was designed to make this hot code extremely fast. + inline CacheKey WithOffset(uint64_t offset) const { + assert(!IsEmpty()); + assert(offset <= max_offset_); + return CacheKey(session_etc64_, offset_etc64_ ^ offset); + } + + // The "common prefix" is a shared prefix for all the returned CacheKeys, + // that also happens to usually be the same among many files in the same DB, + // so is efficient and highly accurate (not perfectly) for DB-specific cache + // dump selection (but not file-specific). + static constexpr size_t kCommonPrefixSize = 8; + inline Slice CommonPrefixSlice() const { + static_assert(sizeof(session_etc64_) == kCommonPrefixSize, + "8 byte common prefix expected"); + assert(!IsEmpty()); + assert(&this->session_etc64_ == static_cast(this)); + + return Slice(reinterpret_cast(this), kCommonPrefixSize); + } + + // For any max_offset <= this value, the same encoding scheme is guaranteed. + static constexpr uint64_t kMaxOffsetStandardEncoding = 0xffffffffffU; + + private: +#ifndef NDEBUG + uint64_t max_offset_ = 0; +#endif +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_reservation_manager.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_reservation_manager.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,188 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#include "cache/cache_reservation_manager.h" + +#include +#include +#include +#include + +#include "cache/cache_entry_roles.h" +#include "rocksdb/cache.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "table/block_based/block_based_table_reader.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { +CacheReservationManager::CacheReservationManager(std::shared_ptr cache, + bool delayed_decrease) + : delayed_decrease_(delayed_decrease), + cache_allocated_size_(0), + memory_used_(0) { + assert(cache != nullptr); + cache_ = cache; +} + +CacheReservationManager::~CacheReservationManager() { + for (auto* handle : dummy_handles_) { + cache_->Release(handle, true); + } +} + +template +Status CacheReservationManager::UpdateCacheReservation( + std::size_t new_mem_used) { + memory_used_ = new_mem_used; + std::size_t cur_cache_allocated_size = + cache_allocated_size_.load(std::memory_order_relaxed); + if (new_mem_used == cur_cache_allocated_size) { + return Status::OK(); + } else if (new_mem_used > cur_cache_allocated_size) { + Status s = IncreaseCacheReservation(new_mem_used); + return s; + } else { + // In delayed decrease mode, we don't decrease cache reservation + // untill the memory usage is less than 3/4 of what we reserve + // in the cache. + // We do this because + // (1) Dummy entry insertion is expensive in block cache + // (2) Delayed releasing previously inserted dummy entries can save such + // expensive dummy entry insertion on memory increase in the near future, + // which is likely to happen when the memory usage is greater than or equal + // to 3/4 of what we reserve + if (delayed_decrease_ && new_mem_used >= cur_cache_allocated_size / 4 * 3) { + return Status::OK(); + } else { + Status s = DecreaseCacheReservation(new_mem_used); + return s; + } + } +} + +// Explicitly instantiate templates for "CacheEntryRole" values we use. +// This makes it possible to keep the template definitions in the .cc file. +template Status CacheReservationManager::UpdateCacheReservation< + CacheEntryRole::kWriteBuffer>(std::size_t new_mem_used); +template Status CacheReservationManager::UpdateCacheReservation< + CacheEntryRole::kCompressionDictionaryBuildingBuffer>( + std::size_t new_mem_used); +// For cache reservation manager unit tests +template Status CacheReservationManager::UpdateCacheReservation< + CacheEntryRole::kMisc>(std::size_t new_mem_used); + +template +Status CacheReservationManager::MakeCacheReservation( + std::size_t incremental_memory_used, + std::unique_ptr>* handle) { + assert(handle != nullptr); + Status s = + UpdateCacheReservation(GetTotalMemoryUsed() + incremental_memory_used); + (*handle).reset(new CacheReservationHandle(incremental_memory_used, + shared_from_this())); + return s; +} + +template Status +CacheReservationManager::MakeCacheReservation( + std::size_t incremental_memory_used, + std::unique_ptr>* handle); +template Status CacheReservationManager::MakeCacheReservation< + CacheEntryRole::kFilterConstruction>( + std::size_t incremental_memory_used, + std::unique_ptr< + CacheReservationHandle>* handle); + +template +Status CacheReservationManager::IncreaseCacheReservation( + std::size_t new_mem_used) { + Status return_status = Status::OK(); + while (new_mem_used > cache_allocated_size_.load(std::memory_order_relaxed)) { + Cache::Handle* handle = nullptr; + return_status = cache_->Insert(GetNextCacheKey(), nullptr, kSizeDummyEntry, + GetNoopDeleterForRole(), &handle); + + if (return_status != Status::OK()) { + return return_status; + } + + dummy_handles_.push_back(handle); + cache_allocated_size_ += kSizeDummyEntry; + } + return return_status; +} + +Status CacheReservationManager::DecreaseCacheReservation( + std::size_t new_mem_used) { + Status return_status = Status::OK(); + + // Decrease to the smallest multiple of kSizeDummyEntry that is greater than + // or equal to new_mem_used We do addition instead of new_mem_used <= + // cache_allocated_size_.load(std::memory_order_relaxed) - kSizeDummyEntry to + // avoid underflow of size_t when cache_allocated_size_ = 0 + while (new_mem_used + kSizeDummyEntry <= + cache_allocated_size_.load(std::memory_order_relaxed)) { + assert(!dummy_handles_.empty()); + auto* handle = dummy_handles_.back(); + cache_->Release(handle, true); + dummy_handles_.pop_back(); + cache_allocated_size_ -= kSizeDummyEntry; + } + return return_status; +} + +std::size_t CacheReservationManager::GetTotalReservedCacheSize() { + return cache_allocated_size_.load(std::memory_order_relaxed); +} + +std::size_t CacheReservationManager::GetTotalMemoryUsed() { + return memory_used_; +} + +Slice CacheReservationManager::GetNextCacheKey() { + // Calling this function will have the side-effect of changing the + // underlying cache_key_ that is shared among other keys generated from this + // fucntion. Therefore please make sure the previous keys are saved/copied + // before calling this function. + cache_key_ = CacheKey::CreateUniqueForCacheLifetime(cache_.get()); + return cache_key_.AsSlice(); +} + +template +Cache::DeleterFn CacheReservationManager::TEST_GetNoopDeleterForRole() { + return GetNoopDeleterForRole(); +} + +template Cache::DeleterFn CacheReservationManager::TEST_GetNoopDeleterForRole< + CacheEntryRole::kFilterConstruction>(); + +template +CacheReservationHandle::CacheReservationHandle( + std::size_t incremental_memory_used, + std::shared_ptr cache_res_mgr) + : incremental_memory_used_(incremental_memory_used) { + assert(cache_res_mgr != nullptr); + cache_res_mgr_ = cache_res_mgr; +} + +template +CacheReservationHandle::~CacheReservationHandle() { + assert(cache_res_mgr_ != nullptr); + assert(cache_res_mgr_->GetTotalMemoryUsed() >= incremental_memory_used_); + + Status s = cache_res_mgr_->UpdateCacheReservation( + cache_res_mgr_->GetTotalMemoryUsed() - incremental_memory_used_); + s.PermitUncheckedError(); +} + +// Explicitly instantiate templates for "CacheEntryRole" values we use. +// This makes it possible to keep the template definitions in the .cc file. +template class CacheReservationHandle; +template class CacheReservationHandle; +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_reservation_manager.h mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_reservation_manager.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,191 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once + +#include +#include +#include +#include +#include + +#include "cache/cache_entry_roles.h" +#include "rocksdb/cache.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "table/block_based/block_based_table_reader.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { + +template +class CacheReservationHandle; + +// CacheReservationManager is for reserving cache space for the memory used +// through inserting/releasing dummy entries in the cache. +// +// This class is NOT thread-safe, except that GetTotalReservedCacheSize() +// can be called without external synchronization. +class CacheReservationManager + : public std::enable_shared_from_this { + public: + // Construct a CacheReservationManager + // @param cache The cache where dummy entries are inserted and released for + // reserving cache space + // @param delayed_decrease If set true, then dummy entries won't be released + // immediately when memory usage decreases. + // Instead, it will be released when the memory usage + // decreases to 3/4 of what we have reserved so far. + // This is for saving some future dummy entry + // insertion when memory usage increases are likely to + // happen in the near future. + explicit CacheReservationManager(std::shared_ptr cache, + bool delayed_decrease = false); + + // no copy constructor, copy assignment, move constructor, move assignment + CacheReservationManager(const CacheReservationManager &) = delete; + CacheReservationManager &operator=(const CacheReservationManager &) = delete; + CacheReservationManager(CacheReservationManager &&) = delete; + CacheReservationManager &operator=(CacheReservationManager &&) = delete; + + ~CacheReservationManager(); + + template + + // One of the two ways of reserving/releasing cache, + // see CacheReservationManager::MakeCacheReservation() for the other. + // Use ONLY one of them to prevent unexpected behavior. + // + // Insert and release dummy entries in the cache to + // match the size of total dummy entries with the least multiple of + // kSizeDummyEntry greater than or equal to new_mem_used + // + // Insert dummy entries if new_memory_used > cache_allocated_size_; + // + // Release dummy entries if new_memory_used < cache_allocated_size_ + // (and new_memory_used < cache_allocated_size_ * 3/4 + // when delayed_decrease is set true); + // + // Keey dummy entries the same if (1) new_memory_used == cache_allocated_size_ + // or (2) new_memory_used is in the interval of + // [cache_allocated_size_ * 3/4, cache_allocated_size) when delayed_decrease + // is set true. + // + // @param new_memory_used The number of bytes used by new memory + // The most recent new_memoy_used passed in will be returned + // in GetTotalMemoryUsed() even when the call return non-ok status. + // + // Since the class is NOT thread-safe, external synchronization on the + // order of calling UpdateCacheReservation() is needed if you want + // GetTotalMemoryUsed() indeed returns the latest memory used. + // + // @return On inserting dummy entries, it returns Status::OK() if all dummy + // entry insertions succeed. + // Otherwise, it returns the first non-ok status; + // On releasing dummy entries, it always returns Status::OK(). + // On keeping dummy entries the same, it always returns Status::OK(). + Status UpdateCacheReservation(std::size_t new_memory_used); + + // One of the two ways of reserving/releasing cache, + // see CacheReservationManager::UpdateCacheReservation() for the other. + // Use ONLY one of them to prevent unexpected behavior. + // + // Insert dummy entries in the cache for the incremental memory usage + // to match the size of total dummy entries with the least multiple of + // kSizeDummyEntry greater than or equal to the total memory used. + // + // A CacheReservationHandle is returned as an output parameter. + // The reserved dummy entries are automatically released on the destruction of + // this handle, which achieves better RAII per cache reservation. + // + // WARNING: Deallocate all the handles of the CacheReservationManager object + // before deallocating the object to prevent unexpected behavior. + // + // @param incremental_memory_used The number of bytes increased in memory + // usage. + // + // Calling GetTotalMemoryUsed() afterward will return the total memory + // increased by this number, even when calling MakeCacheReservation() + // returns non-ok status. + // + // Since the class is NOT thread-safe, external synchronization in + // calling MakeCacheReservation() is needed if you want + // GetTotalMemoryUsed() indeed returns the latest memory used. + // + // @param handle An pointer to std::unique_ptr> that + // manages the lifetime of the handle and its cache reservation. + // + // @return It returns Status::OK() if all dummy + // entry insertions succeed. + // Otherwise, it returns the first non-ok status; + // + // REQUIRES: handle != nullptr + // REQUIRES: The CacheReservationManager object is NOT managed by + // std::unique_ptr as CacheReservationHandle needs to + // shares ownership to the CacheReservationManager object. + template + Status MakeCacheReservation( + std::size_t incremental_memory_used, + std::unique_ptr> *handle); + + // Return the size of the cache (which is a multiple of kSizeDummyEntry) + // successfully reserved by calling UpdateCacheReservation(). + // + // When UpdateCacheReservation() returns non-ok status, + // calling GetTotalReservedCacheSize() after that might return a slightly + // smaller number than the actual reserved cache size due to + // the returned number will always be a multiple of kSizeDummyEntry + // and cache full might happen in the middle of inserting a dummy entry. + std::size_t GetTotalReservedCacheSize(); + + // Return the latest total memory used indicated by the most recent call of + // UpdateCacheReservation(std::size_t new_memory_used); + std::size_t GetTotalMemoryUsed(); + + static constexpr std::size_t GetDummyEntrySize() { return kSizeDummyEntry; } + + // For testing only - it is to help ensure the NoopDeleterForRole + // accessed from CacheReservationManager and the one accessed from the test + // are from the same translation units + template + static Cache::DeleterFn TEST_GetNoopDeleterForRole(); + + private: + static constexpr std::size_t kSizeDummyEntry = 256 * 1024; + + Slice GetNextCacheKey(); + template + Status IncreaseCacheReservation(std::size_t new_mem_used); + Status DecreaseCacheReservation(std::size_t new_mem_used); + + std::shared_ptr cache_; + bool delayed_decrease_; + std::atomic cache_allocated_size_; + std::size_t memory_used_; + std::vector dummy_handles_; + CacheKey cache_key_; +}; + +// CacheReservationHandle is for managing the lifetime of a cache reservation +// This class is NOT thread-safe +template +class CacheReservationHandle { + public: + // REQUIRES: cache_res_mgr != nullptr + explicit CacheReservationHandle( + std::size_t incremental_memory_used, + std::shared_ptr cache_res_mgr); + + ~CacheReservationHandle(); + + private: + std::size_t incremental_memory_used_; + std::shared_ptr cache_res_mgr_; +}; +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,506 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#include "cache/cache_reservation_manager.h" + +#include +#include +#include + +#include "cache/cache_entry_roles.h" +#include "rocksdb/cache.h" +#include "rocksdb/slice.h" +#include "table/block_based/block_based_table_reader.h" +#include "test_util/testharness.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { +class CacheReservationManagerTest : public ::testing::Test { + protected: + static constexpr std::size_t kSizeDummyEntry = + CacheReservationManager::GetDummyEntrySize(); + static constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry; + static constexpr int kNumShardBits = 0; // 2^0 shard + static constexpr std::size_t kMetaDataChargeOverhead = 10000; + + std::shared_ptr cache = NewLRUCache(kCacheCapacity, kNumShardBits); + std::unique_ptr test_cache_rev_mng; + + CacheReservationManagerTest() { + test_cache_rev_mng.reset(new CacheReservationManager(cache)); + } +}; + +TEST_F(CacheReservationManagerTest, GenerateCacheKey) { + std::size_t new_mem_used = 1 * kSizeDummyEntry; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + ASSERT_EQ(s, Status::OK()); + ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry); + ASSERT_LT(cache->GetPinnedUsage(), + 1 * kSizeDummyEntry + kMetaDataChargeOverhead); + + // Next unique Cache key + CacheKey ckey = CacheKey::CreateUniqueForCacheLifetime(cache.get()); + // Back it up to the one used by CRM (using CacheKey implementation details) + using PairU64 = std::pair; + auto& ckey_pair = *reinterpret_cast(&ckey); + ckey_pair.second--; + + // Specific key (subject to implementation details) + EXPECT_EQ(ckey_pair, PairU64(0, 2)); + + Cache::Handle* handle = cache->Lookup(ckey.AsSlice()); + EXPECT_NE(handle, nullptr) + << "Failed to generate the cache key for the dummy entry correctly"; + // Clean up the returned handle from Lookup() to prevent memory leak + cache->Release(handle); +} + +TEST_F(CacheReservationManagerTest, KeepCacheReservationTheSame) { + std::size_t new_mem_used = 1 * kSizeDummyEntry; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + ASSERT_EQ(s, Status::OK()); + ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 1 * kSizeDummyEntry); + ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used); + std::size_t initial_pinned_usage = cache->GetPinnedUsage(); + ASSERT_GE(initial_pinned_usage, 1 * kSizeDummyEntry); + ASSERT_LT(initial_pinned_usage, + 1 * kSizeDummyEntry + kMetaDataChargeOverhead); + + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) + << "Failed to keep cache reservation the same when new_mem_used equals " + "to current cache reservation"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 1 * kSizeDummyEntry) + << "Failed to bookkeep correctly when new_mem_used equals to current " + "cache reservation"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly when new_mem_used " + "equals to current cache reservation"; + EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage) + << "Failed to keep underlying dummy entries the same when new_mem_used " + "equals to current cache reservation"; +} + +TEST_F(CacheReservationManagerTest, + IncreaseCacheReservationByMultiplesOfDummyEntrySize) { + std::size_t new_mem_used = 2 * kSizeDummyEntry; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) + << "Failed to increase cache reservation correctly"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 2 * kSizeDummyEntry) + << "Failed to bookkeep cache reservation increase correctly"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry) + << "Failed to increase underlying dummy entries in cache correctly"; + EXPECT_LT(cache->GetPinnedUsage(), + 2 * kSizeDummyEntry + kMetaDataChargeOverhead) + << "Failed to increase underlying dummy entries in cache correctly"; +} + +TEST_F(CacheReservationManagerTest, + IncreaseCacheReservationNotByMultiplesOfDummyEntrySize) { + std::size_t new_mem_used = 2 * kSizeDummyEntry + kSizeDummyEntry / 2; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) + << "Failed to increase cache reservation correctly"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 3 * kSizeDummyEntry) + << "Failed to bookkeep cache reservation increase correctly"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 3 * kSizeDummyEntry) + << "Failed to increase underlying dummy entries in cache correctly"; + EXPECT_LT(cache->GetPinnedUsage(), + 3 * kSizeDummyEntry + kMetaDataChargeOverhead) + << "Failed to increase underlying dummy entries in cache correctly"; +} + +TEST(CacheReservationManagerIncreaseReservcationOnFullCacheTest, + IncreaseCacheReservationOnFullCache) { + ; + constexpr std::size_t kSizeDummyEntry = + CacheReservationManager::GetDummyEntrySize(); + constexpr std::size_t kSmallCacheCapacity = 4 * kSizeDummyEntry; + constexpr std::size_t kBigCacheCapacity = 4096 * kSizeDummyEntry; + constexpr std::size_t kMetaDataChargeOverhead = 10000; + + LRUCacheOptions lo; + lo.capacity = kSmallCacheCapacity; + lo.num_shard_bits = 0; // 2^0 shard + lo.strict_capacity_limit = true; + std::shared_ptr cache = NewLRUCache(lo); + std::unique_ptr test_cache_rev_mng( + new CacheReservationManager(cache)); + + std::size_t new_mem_used = kSmallCacheCapacity + 1; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::Incomplete()) + << "Failed to return status to indicate failure of dummy entry insertion " + "during cache reservation on full cache"; + EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(), + 1 * kSizeDummyEntry) + << "Failed to bookkeep correctly before cache resevation failure happens " + "due to full cache"; + EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(), + kSmallCacheCapacity) + << "Failed to bookkeep correctly (i.e, bookkeep only successful dummy " + "entry insertions) when encountering cache resevation failure due to " + "full cache"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry) + << "Failed to insert underlying dummy entries correctly when " + "encountering cache resevation failure due to full cache"; + EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity) + << "Failed to insert underlying dummy entries correctly when " + "encountering cache resevation failure due to full cache"; + + new_mem_used = kSmallCacheCapacity / 2; // 2 dummy entries + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) + << "Failed to decrease cache reservation after encountering cache " + "reservation failure due to full cache"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 2 * kSizeDummyEntry) + << "Failed to bookkeep cache reservation decrease correctly after " + "encountering cache reservation due to full cache"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry) + << "Failed to release underlying dummy entries correctly on cache " + "reservation decrease after encountering cache resevation failure due " + "to full cache"; + EXPECT_LT(cache->GetPinnedUsage(), + 2 * kSizeDummyEntry + kMetaDataChargeOverhead) + << "Failed to release underlying dummy entries correctly on cache " + "reservation decrease after encountering cache resevation failure due " + "to full cache"; + + // Create cache full again for subsequent tests + new_mem_used = kSmallCacheCapacity + 1; + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::Incomplete()) + << "Failed to return status to indicate failure of dummy entry insertion " + "during cache reservation on full cache"; + EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(), + 1 * kSizeDummyEntry) + << "Failed to bookkeep correctly before cache resevation failure happens " + "due to full cache"; + EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(), + kSmallCacheCapacity) + << "Failed to bookkeep correctly (i.e, bookkeep only successful dummy " + "entry insertions) when encountering cache resevation failure due to " + "full cache"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry) + << "Failed to insert underlying dummy entries correctly when " + "encountering cache resevation failure due to full cache"; + EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity) + << "Failed to insert underlying dummy entries correctly when " + "encountering cache resevation failure due to full cache"; + + // Increase cache capacity so the previously failed insertion can fully + // succeed + cache->SetCapacity(kBigCacheCapacity); + new_mem_used = kSmallCacheCapacity + 1; + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) + << "Failed to increase cache reservation after increasing cache capacity " + "and mitigating cache full error"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 5 * kSizeDummyEntry) + << "Failed to bookkeep cache reservation increase correctly after " + "increasing cache capacity and mitigating cache full error"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 5 * kSizeDummyEntry) + << "Failed to insert underlying dummy entries correctly after increasing " + "cache capacity and mitigating cache full error"; + EXPECT_LT(cache->GetPinnedUsage(), + 5 * kSizeDummyEntry + kMetaDataChargeOverhead) + << "Failed to insert underlying dummy entries correctly after increasing " + "cache capacity and mitigating cache full error"; +} + +TEST_F(CacheReservationManagerTest, + DecreaseCacheReservationByMultiplesOfDummyEntrySize) { + std::size_t new_mem_used = 2 * kSizeDummyEntry; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + ASSERT_EQ(s, Status::OK()); + ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 2 * kSizeDummyEntry); + ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used); + ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry); + ASSERT_LT(cache->GetPinnedUsage(), + 2 * kSizeDummyEntry + kMetaDataChargeOverhead); + + new_mem_used = 1 * kSizeDummyEntry; + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) + << "Failed to decrease cache reservation correctly"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 1 * kSizeDummyEntry) + << "Failed to bookkeep cache reservation decrease correctly"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry) + << "Failed to decrease underlying dummy entries in cache correctly"; + EXPECT_LT(cache->GetPinnedUsage(), + 1 * kSizeDummyEntry + kMetaDataChargeOverhead) + << "Failed to decrease underlying dummy entries in cache correctly"; +} + +TEST_F(CacheReservationManagerTest, + DecreaseCacheReservationNotByMultiplesOfDummyEntrySize) { + std::size_t new_mem_used = 2 * kSizeDummyEntry; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + ASSERT_EQ(s, Status::OK()); + ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 2 * kSizeDummyEntry); + ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used); + ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry); + ASSERT_LT(cache->GetPinnedUsage(), + 2 * kSizeDummyEntry + kMetaDataChargeOverhead); + + new_mem_used = kSizeDummyEntry / 2; + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) + << "Failed to decrease cache reservation correctly"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 1 * kSizeDummyEntry) + << "Failed to bookkeep cache reservation decrease correctly"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry) + << "Failed to decrease underlying dummy entries in cache correctly"; + EXPECT_LT(cache->GetPinnedUsage(), + 1 * kSizeDummyEntry + kMetaDataChargeOverhead) + << "Failed to decrease underlying dummy entries in cache correctly"; +} + +TEST(CacheReservationManagerWithDelayedDecreaseTest, + DecreaseCacheReservationWithDelayedDecrease) { + constexpr std::size_t kSizeDummyEntry = + CacheReservationManager::GetDummyEntrySize(); + constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry; + constexpr std::size_t kMetaDataChargeOverhead = 10000; + + LRUCacheOptions lo; + lo.capacity = kCacheCapacity; + lo.num_shard_bits = 0; + std::shared_ptr cache = NewLRUCache(lo); + std::unique_ptr test_cache_rev_mng( + new CacheReservationManager(cache, true /* delayed_decrease */)); + + std::size_t new_mem_used = 8 * kSizeDummyEntry; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + ASSERT_EQ(s, Status::OK()); + ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 8 * kSizeDummyEntry); + ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used); + std::size_t initial_pinned_usage = cache->GetPinnedUsage(); + ASSERT_GE(initial_pinned_usage, 8 * kSizeDummyEntry); + ASSERT_LT(initial_pinned_usage, + 8 * kSizeDummyEntry + kMetaDataChargeOverhead); + + new_mem_used = 6 * kSizeDummyEntry; + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 8 * kSizeDummyEntry) + << "Failed to bookkeep correctly when delaying cache reservation " + "decrease"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage) + << "Failed to delay decreasing underlying dummy entries in cache"; + + new_mem_used = 7 * kSizeDummyEntry; + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 8 * kSizeDummyEntry) + << "Failed to bookkeep correctly when delaying cache reservation " + "decrease"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage) + << "Failed to delay decreasing underlying dummy entries in cache"; + + new_mem_used = 6 * kSizeDummyEntry - 1; + s = test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + EXPECT_EQ(s, Status::OK()) + << "Failed to decrease cache reservation correctly when new_mem_used < " + "GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode"; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), + 6 * kSizeDummyEntry) + << "Failed to bookkeep correctly when new_mem_used < " + "GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode"; + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used) + << "Failed to bookkeep the used memory correctly"; + EXPECT_GE(cache->GetPinnedUsage(), 6 * kSizeDummyEntry) + << "Failed to decrease underlying dummy entries in cache when " + "new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed " + "decrease mode"; + EXPECT_LT(cache->GetPinnedUsage(), + 6 * kSizeDummyEntry + kMetaDataChargeOverhead) + << "Failed to decrease underlying dummy entries in cache when " + "new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed " + "decrease mode"; +} + +TEST(CacheReservationManagerDestructorTest, + ReleaseRemainingDummyEntriesOnDestruction) { + constexpr std::size_t kSizeDummyEntry = + CacheReservationManager::GetDummyEntrySize(); + constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry; + constexpr std::size_t kMetaDataChargeOverhead = 10000; + + LRUCacheOptions lo; + lo.capacity = kCacheCapacity; + lo.num_shard_bits = 0; + std::shared_ptr cache = NewLRUCache(lo); + { + std::unique_ptr test_cache_rev_mng( + new CacheReservationManager(cache)); + std::size_t new_mem_used = 1 * kSizeDummyEntry; + Status s = + test_cache_rev_mng + ->UpdateCacheReservation( + new_mem_used); + ASSERT_EQ(s, Status::OK()); + ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry); + ASSERT_LT(cache->GetPinnedUsage(), + 1 * kSizeDummyEntry + kMetaDataChargeOverhead); + } + EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry) + << "Failed to release remaining underlying dummy entries in cache in " + "CacheReservationManager's destructor"; +} + +TEST(CacheReservationHandleTest, HandleTest) { + constexpr std::size_t kOneGigabyte = 1024 * 1024 * 1024; + constexpr std::size_t kSizeDummyEntry = 256 * 1024; + constexpr std::size_t kMetaDataChargeOverhead = 10000; + + LRUCacheOptions lo; + lo.capacity = kOneGigabyte; + lo.num_shard_bits = 0; + std::shared_ptr cache = NewLRUCache(lo); + + std::shared_ptr test_cache_rev_mng( + std::make_shared(cache)); + + std::size_t mem_used = 0; + const std::size_t incremental_mem_used_handle_1 = 1 * kSizeDummyEntry; + const std::size_t incremental_mem_used_handle_2 = 2 * kSizeDummyEntry; + std::unique_ptr> handle_1, + handle_2; + + // To test consecutive CacheReservationManager::MakeCacheReservation works + // correctly in terms of returning the handle as well as updating cache + // reservation and the latest total memory used + Status s = test_cache_rev_mng->MakeCacheReservation( + incremental_mem_used_handle_1, &handle_1); + mem_used = mem_used + incremental_mem_used_handle_1; + ASSERT_EQ(s, Status::OK()); + EXPECT_TRUE(handle_1 != nullptr); + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used); + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used); + EXPECT_GE(cache->GetPinnedUsage(), mem_used); + EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead); + + s = test_cache_rev_mng->MakeCacheReservation( + incremental_mem_used_handle_2, &handle_2); + mem_used = mem_used + incremental_mem_used_handle_2; + ASSERT_EQ(s, Status::OK()); + EXPECT_TRUE(handle_2 != nullptr); + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used); + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used); + EXPECT_GE(cache->GetPinnedUsage(), mem_used); + EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead); + + // To test CacheReservationHandle::~CacheReservationHandle() works correctly + // in releasing the cache reserved for the handle + handle_1.reset(); + EXPECT_TRUE(handle_1 == nullptr); + mem_used = mem_used - incremental_mem_used_handle_1; + EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used); + EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used); + EXPECT_GE(cache->GetPinnedUsage(), mem_used); + EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead); + + // To test the actual CacheReservationManager object won't be deallocated + // as long as there remain handles pointing to it. + // We strongly recommend deallocating CacheReservationManager object only + // after all its handles are deallocated to keep things easy to reasonate + test_cache_rev_mng.reset(); + EXPECT_GE(cache->GetPinnedUsage(), mem_used); + EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead); + + handle_2.reset(); + // The CacheReservationManager object is now deallocated since all the handles + // and its original pointer is gone + mem_used = mem_used - incremental_mem_used_handle_2; + EXPECT_EQ(mem_used, 0); + EXPECT_EQ(cache->GetPinnedUsage(), mem_used); +} +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/cache_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/cache_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -117,8 +117,8 @@ void Insert(std::shared_ptr cache, int key, int value, int charge = 1) { - cache->Insert(EncodeKey(key), EncodeValue(value), charge, - &CacheTest::Deleter); + EXPECT_OK(cache->Insert(EncodeKey(key), EncodeValue(value), charge, + &CacheTest::Deleter)); } void Erase(std::shared_ptr cache, int key) { @@ -167,9 +167,10 @@ for (int i = 1; i < 100; ++i) { std::string key(i, 'a'); auto kv_size = key.size() + 5; - cache->Insert(key, reinterpret_cast(value), kv_size, dumbDeleter); - precise_cache->Insert(key, reinterpret_cast(value), kv_size, - dumbDeleter); + ASSERT_OK(cache->Insert(key, reinterpret_cast(value), kv_size, + dumbDeleter)); + ASSERT_OK(precise_cache->Insert(key, reinterpret_cast(value), + kv_size, dumbDeleter)); usage += kv_size; ASSERT_EQ(usage, cache->GetUsage()); ASSERT_LT(usage, precise_cache->GetUsage()); @@ -183,10 +184,10 @@ // make sure the cache will be overloaded for (uint64_t i = 1; i < kCapacity; ++i) { auto key = ToString(i); - cache->Insert(key, reinterpret_cast(value), key.size() + 5, - dumbDeleter); - precise_cache->Insert(key, reinterpret_cast(value), key.size() + 5, - dumbDeleter); + ASSERT_OK(cache->Insert(key, reinterpret_cast(value), key.size() + 5, + dumbDeleter)); + ASSERT_OK(precise_cache->Insert(key, reinterpret_cast(value), + key.size() + 5, dumbDeleter)); } // the usage should be close to the capacity @@ -215,11 +216,12 @@ auto kv_size = key.size() + 5; Cache::Handle* handle; Cache::Handle* handle_in_precise_cache; - cache->Insert(key, reinterpret_cast(value), kv_size, dumbDeleter, - &handle); + ASSERT_OK(cache->Insert(key, reinterpret_cast(value), kv_size, + dumbDeleter, &handle)); assert(handle); - precise_cache->Insert(key, reinterpret_cast(value), kv_size, - dumbDeleter, &handle_in_precise_cache); + ASSERT_OK(precise_cache->Insert(key, reinterpret_cast(value), + kv_size, dumbDeleter, + &handle_in_precise_cache)); assert(handle_in_precise_cache); pinned_usage += kv_size; ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); @@ -254,10 +256,10 @@ // check that overloading the cache does not change the pinned usage for (uint64_t i = 1; i < 2 * kCapacity; ++i) { auto key = ToString(i); - cache->Insert(key, reinterpret_cast(value), key.size() + 5, - dumbDeleter); - precise_cache->Insert(key, reinterpret_cast(value), key.size() + 5, - dumbDeleter); + ASSERT_OK(cache->Insert(key, reinterpret_cast(value), key.size() + 5, + dumbDeleter)); + ASSERT_OK(precise_cache->Insert(key, reinterpret_cast(value), + key.size() + 5, dumbDeleter)); } ASSERT_EQ(pinned_usage, cache->GetPinnedUsage()); ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage()); @@ -607,6 +609,9 @@ for (size_t i = 5; i < 10; i++) { cache->Release(handles[i]); } + + // Make sure this doesn't crash or upset ASAN/valgrind + cache->DisownData(); } TEST_P(LRUCacheTest, SetStrictCapacityLimit) { @@ -710,25 +715,98 @@ } namespace { -std::vector> callback_state; -void callback(void* entry, size_t charge) { - callback_state.push_back({DecodeValue(entry), static_cast(charge)}); +std::vector> legacy_callback_state; +void legacy_callback(void* value, size_t charge) { + legacy_callback_state.push_back( + {DecodeValue(value), static_cast(charge)}); } }; -TEST_P(CacheTest, ApplyToAllCacheEntiresTest) { +TEST_P(CacheTest, ApplyToAllCacheEntriesTest) { std::vector> inserted; - callback_state.clear(); + legacy_callback_state.clear(); for (int i = 0; i < 10; ++i) { Insert(i, i * 2, i + 1); inserted.push_back({i * 2, i + 1}); } - cache_->ApplyToAllCacheEntries(callback, true); + cache_->ApplyToAllCacheEntries(legacy_callback, true); + + std::sort(inserted.begin(), inserted.end()); + std::sort(legacy_callback_state.begin(), legacy_callback_state.end()); + ASSERT_EQ(inserted.size(), legacy_callback_state.size()); + for (size_t i = 0; i < inserted.size(); ++i) { + EXPECT_EQ(inserted[i], legacy_callback_state[i]); + } +} + +TEST_P(CacheTest, ApplyToAllEntriesTest) { + std::vector callback_state; + const auto callback = [&](const Slice& key, void* value, size_t charge, + Cache::DeleterFn deleter) { + callback_state.push_back(ToString(DecodeKey(key)) + "," + + ToString(DecodeValue(value)) + "," + + ToString(charge)); + assert(deleter == &CacheTest::Deleter); + }; + + std::vector inserted; + callback_state.clear(); + + for (int i = 0; i < 10; ++i) { + Insert(i, i * 2, i + 1); + inserted.push_back(ToString(i) + "," + ToString(i * 2) + "," + + ToString(i + 1)); + } + cache_->ApplyToAllEntries(callback, /*opts*/ {}); std::sort(inserted.begin(), inserted.end()); std::sort(callback_state.begin(), callback_state.end()); - ASSERT_TRUE(inserted == callback_state); + ASSERT_EQ(inserted.size(), callback_state.size()); + for (size_t i = 0; i < inserted.size(); ++i) { + EXPECT_EQ(inserted[i], callback_state[i]); + } +} + +TEST_P(CacheTest, ApplyToAllEntriesDuringResize) { + // This is a mini-stress test of ApplyToAllEntries, to ensure + // items in the cache that are neither added nor removed + // during ApplyToAllEntries are counted exactly once. + + // Insert some entries that we expect to be seen exactly once + // during iteration. + constexpr int kSpecialCharge = 2; + constexpr int kNotSpecialCharge = 1; + constexpr int kSpecialCount = 100; + for (int i = 0; i < kSpecialCount; ++i) { + Insert(i, i * 2, kSpecialCharge); + } + + // For callback + int special_count = 0; + const auto callback = [&](const Slice&, void*, size_t charge, + Cache::DeleterFn) { + if (charge == static_cast(kSpecialCharge)) { + ++special_count; + } + }; + + // Start counting + std::thread apply_thread([&]() { + // Use small average_entries_per_lock to make the problem difficult + Cache::ApplyToAllEntriesOptions opts; + opts.average_entries_per_lock = 2; + cache_->ApplyToAllEntries(callback, opts); + }); + + // In parallel, add more entries, enough to cause resize but not enough + // to cause ejections + for (int i = kSpecialCount * 1; i < kSpecialCount * 6; ++i) { + Insert(i, i * 2, kNotSpecialCharge); + } + + apply_thread.join(); + ASSERT_EQ(special_count, kSpecialCount); } TEST_P(CacheTest, DefaultShardBits) { @@ -747,11 +825,12 @@ ASSERT_EQ(6, sc->GetNumShardBits()); } -TEST_P(CacheTest, GetCharge) { +TEST_P(CacheTest, GetChargeAndDeleter) { Insert(1, 2); Cache::Handle* h1 = cache_->Lookup(EncodeKey(1)); ASSERT_EQ(2, DecodeValue(cache_->Value(h1))); ASSERT_EQ(1, cache_->GetCharge(h1)); + ASSERT_EQ(&CacheTest::Deleter, cache_->GetDeleter(h1)); cache_->Release(h1); } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/clock_cache.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/clock_cache.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/clock_cache.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/clock_cache.cc 2025-05-19 16:14:27.000000000 +0000 @@ -33,11 +33,11 @@ #ifndef ROCKSDB_USE_RTTI #define TBB_USE_EXCEPTIONS 0 #endif -#include "tbb/concurrent_hash_map.h" - #include "cache/sharded_cache.h" +#include "port/lang.h" #include "port/malloc.h" #include "port/port.h" +#include "tbb/concurrent_hash_map.h" #include "util/autovector.h" #include "util/mutexlock.h" @@ -176,13 +176,16 @@ // Cache entry meta data. struct CacheHandle { Slice key; - uint32_t hash; void* value; size_t charge; - void (*deleter)(const Slice&, void* value); + Cache::DeleterFn deleter; + uint32_t hash; + + // Addition to "charge" to get "total charge" under metadata policy. + uint32_t meta_charge; // Flags and counters associated with the cache handle: - // lowest bit: n-cache bit + // lowest bit: in-cache bit // second lowest bit: usage bit // the rest bits: reference count // The handle is unused when flags equals to 0. The thread decreases the count @@ -205,9 +208,8 @@ return *this; } - inline static size_t CalcTotalCharge( - Slice key, size_t charge, - CacheMetadataChargePolicy metadata_charge_policy) { + inline static uint32_t CalcMetadataCharge( + Slice key, CacheMetadataChargePolicy metadata_charge_policy) { size_t meta_charge = 0; if (metadata_charge_policy == kFullChargeCacheMetadata) { meta_charge += sizeof(CacheHandle); @@ -218,32 +220,30 @@ meta_charge += key.size(); #endif } - return charge + meta_charge; + assert(meta_charge <= UINT32_MAX); + return static_cast(meta_charge); } - inline size_t CalcTotalCharge( - CacheMetadataChargePolicy metadata_charge_policy) { - return CalcTotalCharge(key, charge, metadata_charge_policy); - } + inline size_t GetTotalCharge() { return charge + meta_charge; } }; // Key of hash map. We store hash value with the key for convenience. -struct CacheKey { +struct ClockCacheKey { Slice key; uint32_t hash_value; - CacheKey() = default; + ClockCacheKey() = default; - CacheKey(const Slice& k, uint32_t h) { + ClockCacheKey(const Slice& k, uint32_t h) { key = k; hash_value = h; } - static bool equal(const CacheKey& a, const CacheKey& b) { + static bool equal(const ClockCacheKey& a, const ClockCacheKey& b) { return a.hash_value == b.hash_value && a.key == b.key; } - static size_t hash(const CacheKey& a) { + static size_t hash(const ClockCacheKey& a) { return static_cast(a.hash_value); } }; @@ -260,7 +260,8 @@ class ClockCacheShard final : public CacheShard { public: // Hash map type. - typedef tbb::concurrent_hash_map HashTable; + using HashTable = + tbb::concurrent_hash_map; ClockCacheShard(); ~ClockCacheShard() override; @@ -271,7 +272,26 @@ Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), Cache::Handle** handle, Cache::Priority priority) override; + Status Insert(const Slice& key, uint32_t hash, void* value, + const Cache::CacheItemHelper* helper, size_t charge, + Cache::Handle** handle, Cache::Priority priority) override { + return Insert(key, hash, value, charge, helper->del_cb, handle, priority); + } Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; + Cache::Handle* Lookup(const Slice& key, uint32_t hash, + const Cache::CacheItemHelper* /*helper*/, + const Cache::CreateCallback& /*create_cb*/, + Cache::Priority /*priority*/, bool /*wait*/, + Statistics* /*stats*/) override { + return Lookup(key, hash); + } + bool Release(Cache::Handle* handle, bool /*useful*/, + bool force_erase) override { + return Release(handle, force_erase); + } + bool IsReady(Cache::Handle* /*handle*/) override { return true; } + void Wait(Cache::Handle* /*handle*/) override {} + // If the entry in in cache, increase reference count and return true. // Return false otherwise. // @@ -284,8 +304,10 @@ size_t GetUsage() const override; size_t GetPinnedUsage() const override; void EraseUnRefEntries() override; - void ApplyToAllCacheEntries(void (*callback)(void*, size_t), - bool thread_safe) override; + void ApplyToSomeEntries( + const std::function& callback, + uint32_t average_entries_per_lock, uint32_t* state) override; private: static const uint32_t kInCacheBit = 1; @@ -341,7 +363,8 @@ CacheHandle* Insert(const Slice& key, uint32_t hash, void* value, size_t change, void (*deleter)(const Slice& key, void* value), - bool hold_reference, CleanupContext* context); + bool hold_reference, CleanupContext* context, + bool* overwritten); // Guards list_, head_, and recycle_. In addition, updating table_ also has // to hold the mutex, to avoid the cache being in inconsistent state. @@ -403,22 +426,46 @@ return pinned_usage_.load(std::memory_order_relaxed); } -void ClockCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t), - bool thread_safe) { - if (thread_safe) { - mutex_.Lock(); +void ClockCacheShard::ApplyToSomeEntries( + const std::function& callback, + uint32_t average_entries_per_lock, uint32_t* state) { + assert(average_entries_per_lock > 0); + MutexLock lock(&mutex_); + + // Figure out the range to iterate, update `state` + size_t list_size = list_.size(); + size_t start_idx = *state; + size_t end_idx = start_idx + average_entries_per_lock; + if (start_idx > list_size) { + // Shouldn't reach here, but recoverable + assert(false); + // Mark finished with all + *state = UINT32_MAX; + return; + } + if (end_idx >= list_size || end_idx >= UINT32_MAX) { + // This also includes the hypothetical case of >4 billion + // cache handles. + end_idx = list_size; + // Mark finished with all + *state = UINT32_MAX; + } else { + *state = static_cast(end_idx); } - for (auto& handle : list_) { - // Use relaxed semantics instead of acquire semantics since we are either - // holding mutex, or don't have thread safe requirement. + + // Do the iteration + auto cur = list_.begin() + start_idx; + auto end = list_.begin() + end_idx; + for (; cur != end; ++cur) { + const CacheHandle& handle = *cur; + // Use relaxed semantics instead of acquire semantics since we are + // holding mutex uint32_t flags = handle.flags.load(std::memory_order_relaxed); if (InCache(flags)) { - callback(handle.value, handle.charge); + callback(handle.key, handle.value, handle.charge, handle.deleter); } } - if (thread_safe) { - mutex_.Unlock(); - } } void ClockCacheShard::RecycleHandle(CacheHandle* handle, @@ -427,10 +474,8 @@ assert(!InCache(handle->flags) && CountRefs(handle->flags) == 0); context->to_delete_key.push_back(handle->key.data()); context->to_delete_value.emplace_back(*handle); - size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_); - handle->key.clear(); - handle->value = nullptr; - handle->deleter = nullptr; + size_t total_charge = handle->GetTotalCharge(); + // clearing `handle` fields would go here but not strictly required recycle_.push_back(handle); usage_.fetch_sub(total_charge, std::memory_order_relaxed); } @@ -458,7 +503,7 @@ std::memory_order_relaxed)) { if (CountRefs(flags) == 0) { // No reference count before the operation. - size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_); + size_t total_charge = handle->GetTotalCharge(); pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed); } return true; @@ -472,6 +517,11 @@ if (set_usage) { handle->flags.fetch_or(kUsageBit, std::memory_order_relaxed); } + // If the handle reaches state refs=0 and InCache=true after this + // atomic operation then we cannot access `handle` afterward, because + // it could be evicted before we access the `handle`. + size_t total_charge = handle->GetTotalCharge(); + // Use acquire-release semantics as previous operations on the cache entry // has to be order before reference count is decreased, and potential cleanup // of the entry has to be order after. @@ -479,7 +529,6 @@ assert(CountRefs(flags) > 0); if (CountRefs(flags) == 1) { // this is the last reference. - size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_); pinned_usage_.fetch_sub(total_charge, std::memory_order_relaxed); // Cleanup if it is the last reference. if (!InCache(flags)) { @@ -511,7 +560,7 @@ if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire, std::memory_order_relaxed)) { bool erased __attribute__((__unused__)) = - table_.erase(CacheKey(handle->key, handle->hash)); + table_.erase(ClockCacheKey(handle->key, handle->hash)); assert(erased); RecycleHandle(handle, context); return true; @@ -564,9 +613,11 @@ CacheHandle* ClockCacheShard::Insert( const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), bool hold_reference, - CleanupContext* context) { - size_t total_charge = - CacheHandle::CalcTotalCharge(key, charge, metadata_charge_policy_); + CleanupContext* context, bool* overwritten) { + assert(overwritten != nullptr && *overwritten == false); + uint32_t meta_charge = + CacheHandle::CalcMetadataCharge(key, metadata_charge_policy_); + size_t total_charge = charge + meta_charge; MutexLock l(&mutex_); bool success = EvictFromCache(total_charge, context); bool strict = strict_capacity_limit_.load(std::memory_order_relaxed); @@ -592,16 +643,27 @@ handle->hash = hash; handle->value = value; handle->charge = charge; + handle->meta_charge = meta_charge; handle->deleter = deleter; uint32_t flags = hold_reference ? kInCacheBit + kOneRef : kInCacheBit; + + // TODO investigate+fix suspected race condition: + // [thread 1] Lookup starts, up to Ref() + // [thread 2] Erase/evict the entry just looked up + // [thread 1] Ref() the handle, even though it's in the recycle bin + // [thread 2] Insert with recycling that handle + // Here we obliterate the other thread's Ref + // Possible fix: never blindly overwrite the flags, but only make + // relative updates (fetch_add, etc). handle->flags.store(flags, std::memory_order_relaxed); HashTable::accessor accessor; - if (table_.find(accessor, CacheKey(key, hash))) { + if (table_.find(accessor, ClockCacheKey(key, hash))) { + *overwritten = true; CacheHandle* existing_handle = accessor->second; table_.erase(accessor); UnsetInCache(existing_handle, context); } - table_.insert(HashTable::value_type(CacheKey(key, hash), handle)); + table_.insert(HashTable::value_type(ClockCacheKey(key, hash), handle)); if (hold_reference) { pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed); } @@ -619,8 +681,9 @@ char* key_data = new char[key.size()]; memcpy(key_data, key.data(), key.size()); Slice key_copy(key_data, key.size()); + bool overwritten = false; CacheHandle* handle = Insert(key_copy, hash, value, charge, deleter, - out_handle != nullptr, &context); + out_handle != nullptr, &context, &overwritten); Status s; if (out_handle != nullptr) { if (handle == nullptr) { @@ -629,13 +692,17 @@ *out_handle = reinterpret_cast(handle); } } + if (overwritten) { + assert(s.ok()); + s = Status::OkOverwritten(); + } Cleanup(context); return s; } Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) { HashTable::const_accessor accessor; - if (!table_.find(accessor, CacheKey(key, hash))) { + if (!table_.find(accessor, ClockCacheKey(key, hash))) { return nullptr; } CacheHandle* handle = accessor->second; @@ -680,7 +747,7 @@ MutexLock l(&mutex_); HashTable::accessor accessor; bool erased = false; - if (table_.find(accessor, CacheKey(key, hash))) { + if (table_.find(accessor, ClockCacheKey(key, hash))) { CacheHandle* handle = accessor->second; table_.erase(accessor); erased = UnsetInCache(handle, context); @@ -718,11 +785,11 @@ const char* Name() const override { return "ClockCache"; } - CacheShard* GetShard(int shard) override { + CacheShard* GetShard(uint32_t shard) override { return reinterpret_cast(&shards_[shard]); } - const CacheShard* GetShard(int shard) const override { + const CacheShard* GetShard(uint32_t shard) const override { return reinterpret_cast(&shards_[shard]); } @@ -738,7 +805,18 @@ return reinterpret_cast(handle)->hash; } - void DisownData() override { shards_ = nullptr; } + DeleterFn GetDeleter(Handle* handle) const override { + return reinterpret_cast(handle)->deleter; + } + + void DisownData() override { + // Leak data only if that won't generate an ASAN/valgrind warning + if (!kMustFreeHeapAllocations) { + shards_ = nullptr; + } + } + + void WaitAll(std::vector& /*handles*/) override {} private: ClockCacheShard* shards_; diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/lru_cache.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/lru_cache.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache.cc 2025-05-19 16:14:27.000000000 +0000 @@ -9,26 +9,31 @@ #include "cache/lru_cache.h" -#include -#include -#include -#include - +#include +#include +#include + +#include "monitoring/perf_context_imp.h" +#include "monitoring/statistics.h" +#include "port/lang.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { -LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) { - Resize(); -} +LRUHandleTable::LRUHandleTable(int max_upper_hash_bits) + : length_bits_(/* historical starting size*/ 4), + list_(new LRUHandle* [size_t{1} << length_bits_] {}), + elems_(0), + max_length_bits_(max_upper_hash_bits) {} LRUHandleTable::~LRUHandleTable() { - ApplyToAllCacheEntries([](LRUHandle* h) { - if (!h->HasRefs()) { - h->Free(); - } - }); - delete[] list_; + ApplyToEntriesRange( + [](LRUHandle* h) { + if (!h->HasRefs()) { + h->Free(); + } + }, + 0, uint32_t{1} << length_bits_); } LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) { @@ -42,7 +47,7 @@ *ptr = h; if (old == nullptr) { ++elems_; - if (elems_ > length_) { + if ((elems_ >> length_bits_) > 0) { // elems_ >= length // Since each cache entry is fairly large, we aim for a small // average linked list length (<= 1). Resize(); @@ -62,7 +67,7 @@ } LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) { - LRUHandle** ptr = &list_[hash & (length_ - 1)]; + LRUHandle** ptr = &list_[hash >> (32 - length_bits_)]; while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) { ptr = &(*ptr)->next_hash; } @@ -70,19 +75,29 @@ } void LRUHandleTable::Resize() { - uint32_t new_length = 16; - while (new_length < elems_ * 1.5) { - new_length *= 2; + if (length_bits_ >= max_length_bits_) { + // Due to reaching limit of hash information, if we made the table + // bigger, we would allocate more addresses but only the same + // number would be used. + return; + } + if (length_bits_ >= 31) { + // Avoid undefined behavior shifting uint32_t by 32 + return; } - LRUHandle** new_list = new LRUHandle*[new_length]; - memset(new_list, 0, sizeof(new_list[0]) * new_length); + + uint32_t old_length = uint32_t{1} << length_bits_; + int new_length_bits = length_bits_ + 1; + std::unique_ptr new_list { + new LRUHandle* [size_t{1} << new_length_bits] {} + }; uint32_t count = 0; - for (uint32_t i = 0; i < length_; i++) { + for (uint32_t i = 0; i < old_length; i++) { LRUHandle* h = list_[i]; while (h != nullptr) { LRUHandle* next = h->next_hash; uint32_t hash = h->hash; - LRUHandle** ptr = &new_list[hash & (new_length - 1)]; + LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)]; h->next_hash = *ptr; *ptr = h; h = next; @@ -90,23 +105,25 @@ } } assert(elems_ == count); - delete[] list_; - list_ = new_list; - length_ = new_length; + list_ = std::move(new_list); + length_bits_ = new_length_bits; } -LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit, - double high_pri_pool_ratio, - bool use_adaptive_mutex, - CacheMetadataChargePolicy metadata_charge_policy) +LRUCacheShard::LRUCacheShard( + size_t capacity, bool strict_capacity_limit, double high_pri_pool_ratio, + bool use_adaptive_mutex, CacheMetadataChargePolicy metadata_charge_policy, + int max_upper_hash_bits, + const std::shared_ptr& secondary_cache) : capacity_(0), high_pri_pool_usage_(0), strict_capacity_limit_(strict_capacity_limit), high_pri_pool_ratio_(high_pri_pool_ratio), high_pri_pool_capacity_(0), + table_(max_upper_hash_bits), usage_(0), lru_usage_(0), - mutex_(use_adaptive_mutex) { + mutex_(use_adaptive_mutex), + secondary_cache_(secondary_cache) { set_metadata_charge_policy(metadata_charge_policy); // Make empty circular linked list lru_.next = &lru_; @@ -138,19 +155,40 @@ } } -void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t), - bool thread_safe) { - const auto applyCallback = [&]() { - table_.ApplyToAllCacheEntries( - [callback](LRUHandle* h) { callback(h->value, h->charge); }); - }; +void LRUCacheShard::ApplyToSomeEntries( + const std::function& callback, + uint32_t average_entries_per_lock, uint32_t* state) { + // The state is essentially going to be the starting hash, which works + // nicely even if we resize between calls because we use upper-most + // hash bits for table indexes. + MutexLock l(&mutex_); + uint32_t length_bits = table_.GetLengthBits(); + uint32_t length = uint32_t{1} << length_bits; - if (thread_safe) { - MutexLock l(&mutex_); - applyCallback(); + assert(average_entries_per_lock > 0); + // Assuming we are called with same average_entries_per_lock repeatedly, + // this simplifies some logic (index_end will not overflow) + assert(average_entries_per_lock < length || *state == 0); + + uint32_t index_begin = *state >> (32 - length_bits); + uint32_t index_end = index_begin + average_entries_per_lock; + if (index_end >= length) { + // Going to end + index_end = length; + *state = UINT32_MAX; } else { - applyCallback(); + *state = index_end << (32 - length_bits); } + + table_.ApplyToEntriesRange( + [callback](LRUHandle* h) { + DeleterFn deleter = h->IsSecondaryCacheCompatible() + ? h->info_.helper->del_cb + : h->info_.deleter; + callback(h->key(), h->value, h->charge, deleter); + }, + index_begin, index_end); } void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) { @@ -257,8 +295,14 @@ EvictFromLRU(0, &last_reference_list); } + // Try to insert the evicted entries into tiered cache // Free the entries outside of mutex for performance reasons for (auto entry : last_reference_list) { + if (secondary_cache_ && entry->IsSecondaryCacheCompatible() && + !entry->IsPromoted()) { + secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper) + .PermitUncheckedError(); + } entry->Free(); } } @@ -268,17 +312,181 @@ strict_capacity_limit_ = strict_capacity_limit; } -Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) { - MutexLock l(&mutex_); - LRUHandle* e = table_.Lookup(key, hash); - if (e != nullptr) { - assert(e->InCache()); - if (!e->HasRefs()) { - // The entry is in LRU since it's in hash and has no external references - LRU_Remove(e); +Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle, + bool free_handle_on_fail) { + Status s = Status::OK(); + autovector last_reference_list; + size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); + + { + MutexLock l(&mutex_); + + // Free the space following strict LRU policy until enough space + // is freed or the lru list is empty + EvictFromLRU(total_charge, &last_reference_list); + + if ((usage_ + total_charge) > capacity_ && + (strict_capacity_limit_ || handle == nullptr)) { + e->SetInCache(false); + if (handle == nullptr) { + // Don't insert the entry but still return ok, as if the entry inserted + // into cache and get evicted immediately. + last_reference_list.push_back(e); + } else { + if (free_handle_on_fail) { + delete[] reinterpret_cast(e); + *handle = nullptr; + } + s = Status::Incomplete("Insert failed due to LRU cache being full."); + } + } else { + // Insert into the cache. Note that the cache might get larger than its + // capacity if not enough space was freed up. + LRUHandle* old = table_.Insert(e); + usage_ += total_charge; + if (old != nullptr) { + s = Status::OkOverwritten(); + assert(old->InCache()); + old->SetInCache(false); + if (!old->HasRefs()) { + // old is on LRU because it's in cache and its reference count is 0 + LRU_Remove(old); + size_t old_total_charge = + old->CalcTotalCharge(metadata_charge_policy_); + assert(usage_ >= old_total_charge); + usage_ -= old_total_charge; + last_reference_list.push_back(old); + } + } + if (handle == nullptr) { + LRU_Insert(e); + } else { + // If caller already holds a ref, no need to take one here + if (!e->HasRefs()) { + e->Ref(); + } + *handle = reinterpret_cast(e); + } + } + } + + // Try to insert the evicted entries into the secondary cache + // Free the entries here outside of mutex for performance reasons + for (auto entry : last_reference_list) { + if (secondary_cache_ && entry->IsSecondaryCacheCompatible() && + !entry->IsPromoted()) { + secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper) + .PermitUncheckedError(); + } + entry->Free(); + } + + return s; +} + +void LRUCacheShard::Promote(LRUHandle* e) { + SecondaryCacheResultHandle* secondary_handle = e->sec_handle; + + assert(secondary_handle->IsReady()); + e->SetIncomplete(false); + e->SetInCache(true); + e->SetPromoted(true); + e->value = secondary_handle->Value(); + e->charge = secondary_handle->Size(); + delete secondary_handle; + + // This call could fail if the cache is over capacity and + // strict_capacity_limit_ is true. In such a case, we don't want + // InsertItem() to free the handle, since the item is already in memory + // and the caller will most likely just read from disk if we erase it here. + if (e->value) { + Cache::Handle* handle = reinterpret_cast(e); + Status s = InsertItem(e, &handle, /*free_handle_on_fail=*/false); + if (!s.ok()) { + // Item is in memory, but not accounted against the cache capacity. + // When the handle is released, the item should get deleted + assert(!e->InCache()); + } + } else { + // Since the secondary cache lookup failed, mark the item as not in cache + // Don't charge the cache as its only metadata that'll shortly be released + MutexLock l(&mutex_); + e->charge = 0; + e->SetInCache(false); + } +} + +Cache::Handle* LRUCacheShard::Lookup( + const Slice& key, uint32_t hash, + const ShardedCache::CacheItemHelper* helper, + const ShardedCache::CreateCallback& create_cb, Cache::Priority priority, + bool wait, Statistics* stats) { + LRUHandle* e = nullptr; + { + MutexLock l(&mutex_); + e = table_.Lookup(key, hash); + if (e != nullptr) { + assert(e->InCache()); + if (!e->HasRefs()) { + // The entry is in LRU since it's in hash and has no external references + LRU_Remove(e); + } + e->Ref(); + e->SetHit(); + } + } + + // If handle table lookup failed, then allocate a handle outside the + // mutex if we're going to lookup in the secondary cache + // Only support synchronous for now + // TODO: Support asynchronous lookup in secondary cache + if (!e && secondary_cache_ && helper && helper->saveto_cb) { + // For objects from the secondary cache, we expect the caller to provide + // a way to create/delete the primary cache object. The only case where + // a deleter would not be required is for dummy entries inserted for + // accounting purposes, which we won't demote to the secondary cache + // anyway. + assert(create_cb && helper->del_cb); + std::unique_ptr secondary_handle = + secondary_cache_->Lookup(key, create_cb, wait); + if (secondary_handle != nullptr) { + e = reinterpret_cast( + new char[sizeof(LRUHandle) - 1 + key.size()]); + + e->flags = 0; + e->SetSecondaryCacheCompatible(true); + e->info_.helper = helper; + e->key_length = key.size(); + e->hash = hash; + e->refs = 0; + e->next = e->prev = nullptr; + e->SetPriority(priority); + memcpy(e->key_data, key.data(), key.size()); + e->value = nullptr; + e->sec_handle = secondary_handle.release(); + e->Ref(); + + if (wait) { + Promote(e); + if (!e->value) { + // The secondary cache returned a handle, but the lookup failed + e->Unref(); + e->Free(); + e = nullptr; + } else { + PERF_COUNTER_ADD(secondary_cache_hit_count, 1); + RecordTick(stats, SECONDARY_CACHE_HITS); + } + } else { + // If wait is false, we always return a handle and let the caller + // release the handle after checking for success or failure + e->SetIncomplete(true); + // This may be slightly inaccurate, if the lookup eventually fails. + // But the probability is very low. + PERF_COUNTER_ADD(secondary_cache_hit_count, 1); + RecordTick(stats, SECONDARY_CACHE_HITS); + } } - e->Ref(); - e->SetHit(); } return reinterpret_cast(e); } @@ -322,7 +530,12 @@ last_reference = false; } } - if (last_reference) { + // If it was the last reference, and the entry is either not secondary + // cache compatible (i.e a dummy entry for accounting), or is secondary + // cache compatible and has a non-null value, then decrement the cache + // usage. If value is null in the latter case, taht means the lookup + // failed and we didn't charge the cache. + if (last_reference && (!e->IsSecondaryCacheCompatible() || e->value)) { size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); assert(usage_ >= total_charge); usage_ -= total_charge; @@ -339,80 +552,35 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value, size_t charge, void (*deleter)(const Slice& key, void* value), + const Cache::CacheItemHelper* helper, Cache::Handle** handle, Cache::Priority priority) { // Allocate the memory here outside of the mutex // If the cache is full, we'll have to release it // It shouldn't happen very often though. LRUHandle* e = reinterpret_cast( new char[sizeof(LRUHandle) - 1 + key.size()]); - Status s = Status::OK(); - autovector last_reference_list; e->value = value; - e->deleter = deleter; + e->flags = 0; + if (helper) { + e->SetSecondaryCacheCompatible(true); + e->info_.helper = helper; + } else { +#ifdef __SANITIZE_THREAD__ + e->is_secondary_cache_compatible_for_tsan = false; +#endif // __SANITIZE_THREAD__ + e->info_.deleter = deleter; + } e->charge = charge; e->key_length = key.size(); - e->flags = 0; e->hash = hash; e->refs = 0; e->next = e->prev = nullptr; e->SetInCache(true); e->SetPriority(priority); memcpy(e->key_data, key.data(), key.size()); - size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_); - - { - MutexLock l(&mutex_); - - // Free the space following strict LRU policy until enough space - // is freed or the lru list is empty - EvictFromLRU(total_charge, &last_reference_list); - - if ((usage_ + total_charge) > capacity_ && - (strict_capacity_limit_ || handle == nullptr)) { - if (handle == nullptr) { - // Don't insert the entry but still return ok, as if the entry inserted - // into cache and get evicted immediately. - e->SetInCache(false); - last_reference_list.push_back(e); - } else { - delete[] reinterpret_cast(e); - *handle = nullptr; - s = Status::Incomplete("Insert failed due to LRU cache being full."); - } - } else { - // Insert into the cache. Note that the cache might get larger than its - // capacity if not enough space was freed up. - LRUHandle* old = table_.Insert(e); - usage_ += total_charge; - if (old != nullptr) { - assert(old->InCache()); - old->SetInCache(false); - if (!old->HasRefs()) { - // old is on LRU because it's in cache and its reference count is 0 - LRU_Remove(old); - size_t old_total_charge = - old->CalcTotalCharge(metadata_charge_policy_); - assert(usage_ >= old_total_charge); - usage_ -= old_total_charge; - last_reference_list.push_back(old); - } - } - if (handle == nullptr) { - LRU_Insert(e); - } else { - e->Ref(); - *handle = reinterpret_cast(e); - } - } - } - - // Free the entries here outside of mutex for performance reasons - for (auto entry : last_reference_list) { - entry->Free(); - } - return s; + return InsertItem(e, handle, /* free_handle_on_fail */ true); } void LRUCacheShard::Erase(const Slice& key, uint32_t hash) { @@ -442,6 +610,18 @@ } } +bool LRUCacheShard::IsReady(Cache::Handle* handle) { + LRUHandle* e = reinterpret_cast(handle); + MutexLock l(&mutex_); + bool ready = true; + if (e->IsPending()) { + assert(secondary_cache_); + assert(e->sec_handle); + ready = e->sec_handle->IsReady(); + } + return ready; +} + size_t LRUCacheShard::GetUsage() const { MutexLock l(&mutex_); return usage_; @@ -468,7 +648,8 @@ bool strict_capacity_limit, double high_pri_pool_ratio, std::shared_ptr allocator, bool use_adaptive_mutex, - CacheMetadataChargePolicy metadata_charge_policy) + CacheMetadataChargePolicy metadata_charge_policy, + const std::shared_ptr& secondary_cache) : ShardedCache(capacity, num_shard_bits, strict_capacity_limit, std::move(allocator)) { num_shards_ = 1 << num_shard_bits; @@ -476,10 +657,12 @@ port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_)); size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_; for (int i = 0; i < num_shards_; i++) { - new (&shards_[i]) - LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio, - use_adaptive_mutex, metadata_charge_policy); + new (&shards_[i]) LRUCacheShard( + per_shard, strict_capacity_limit, high_pri_pool_ratio, + use_adaptive_mutex, metadata_charge_policy, + /* max_upper_hash_bits */ 32 - num_shard_bits, secondary_cache); } + secondary_cache_ = secondary_cache; } LRUCache::~LRUCache() { @@ -492,11 +675,11 @@ } } -CacheShard* LRUCache::GetShard(int shard) { +CacheShard* LRUCache::GetShard(uint32_t shard) { return reinterpret_cast(&shards_[shard]); } -const CacheShard* LRUCache::GetShard(int shard) const { +const CacheShard* LRUCache::GetShard(uint32_t shard) const { return reinterpret_cast(&shards_[shard]); } @@ -508,23 +691,25 @@ return reinterpret_cast(handle)->charge; } +Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const { + auto h = reinterpret_cast(handle); + if (h->IsSecondaryCacheCompatible()) { + return h->info_.helper->del_cb; + } else { + return h->info_.deleter; + } +} + uint32_t LRUCache::GetHash(Handle* handle) const { return reinterpret_cast(handle)->hash; } void LRUCache::DisownData() { -// Do not drop data if compile with ASAN to suppress leak warning. -#if defined(__clang__) -#if !defined(__has_feature) || !__has_feature(address_sanitizer) - shards_ = nullptr; - num_shards_ = 0; -#endif -#else // __clang__ -#ifndef __SANITIZE_ADDRESS__ - shards_ = nullptr; - num_shards_ = 0; -#endif // !__SANITIZE_ADDRESS__ -#endif // __clang__ + // Leak data only if that won't generate an ASAN/valgrind warning + if (!kMustFreeHeapAllocations) { + shards_ = nullptr; + num_shards_ = 0; + } } size_t LRUCache::TEST_GetLRUSize() { @@ -543,19 +728,42 @@ return result; } -std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts) { - return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits, - cache_opts.strict_capacity_limit, - cache_opts.high_pri_pool_ratio, - cache_opts.memory_allocator, cache_opts.use_adaptive_mutex, - cache_opts.metadata_charge_policy); +void LRUCache::WaitAll(std::vector& handles) { + if (secondary_cache_) { + std::vector sec_handles; + sec_handles.reserve(handles.size()); + for (Handle* handle : handles) { + if (!handle) { + continue; + } + LRUHandle* lru_handle = reinterpret_cast(handle); + if (!lru_handle->IsPending()) { + continue; + } + sec_handles.emplace_back(lru_handle->sec_handle); + } + secondary_cache_->WaitAll(sec_handles); + for (Handle* handle : handles) { + if (!handle) { + continue; + } + LRUHandle* lru_handle = reinterpret_cast(handle); + if (!lru_handle->IsPending()) { + continue; + } + uint32_t hash = GetHash(handle); + LRUCacheShard* shard = static_cast(GetShard(Shard(hash))); + shard->Promote(lru_handle); + } + } } std::shared_ptr NewLRUCache( size_t capacity, int num_shard_bits, bool strict_capacity_limit, double high_pri_pool_ratio, std::shared_ptr memory_allocator, bool use_adaptive_mutex, - CacheMetadataChargePolicy metadata_charge_policy) { + CacheMetadataChargePolicy metadata_charge_policy, + const std::shared_ptr& secondary_cache) { if (num_shard_bits >= 20) { return nullptr; // the cache cannot be sharded into too many fine pieces } @@ -568,7 +776,25 @@ } return std::make_shared( capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio, - std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy); + std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy, + secondary_cache); +} + +std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts) { + return NewLRUCache( + cache_opts.capacity, cache_opts.num_shard_bits, + cache_opts.strict_capacity_limit, cache_opts.high_pri_pool_ratio, + cache_opts.memory_allocator, cache_opts.use_adaptive_mutex, + cache_opts.metadata_charge_policy, cache_opts.secondary_cache); } +std::shared_ptr NewLRUCache( + size_t capacity, int num_shard_bits, bool strict_capacity_limit, + double high_pri_pool_ratio, + std::shared_ptr memory_allocator, bool use_adaptive_mutex, + CacheMetadataChargePolicy metadata_charge_policy) { + return NewLRUCache(capacity, num_shard_bits, strict_capacity_limit, + high_pri_pool_ratio, memory_allocator, use_adaptive_mutex, + metadata_charge_policy, nullptr); +} } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/lru_cache.h mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/lru_cache.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache.h 2025-05-19 16:14:27.000000000 +0000 @@ -1,4 +1,4 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). @@ -8,12 +8,14 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once +#include #include #include "cache/sharded_cache.h" - +#include "port/lang.h" #include "port/malloc.h" #include "port/port.h" +#include "rocksdb/secondary_cache.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { @@ -49,8 +51,18 @@ struct LRUHandle { void* value; - void (*deleter)(const Slice&, void* value); - LRUHandle* next_hash; + union Info { + Info() {} + ~Info() {} + Cache::DeleterFn deleter; + const ShardedCache::CacheItemHelper* helper; + } info_; + // An entry is not added to the LRUHandleTable until the secondary cache + // lookup is complete, so its safe to have this union. + union { + LRUHandle* next_hash; + SecondaryCacheResultHandle* sec_handle; + }; LRUHandle* next; LRUHandle* prev; size_t charge; // TODO(opt): Only allow uint32_t? @@ -67,12 +79,26 @@ IS_HIGH_PRI = (1 << 1), // Whether this entry is in high-pri pool. IN_HIGH_PRI_POOL = (1 << 2), - // Wwhether this entry has had any lookups (hits). + // Whether this entry has had any lookups (hits). HAS_HIT = (1 << 3), + // Can this be inserted into the secondary cache + IS_SECONDARY_CACHE_COMPATIBLE = (1 << 4), + // Is the handle still being read from a lower tier + IS_PENDING = (1 << 5), + // Has the item been promoted from a lower tier + IS_PROMOTED = (1 << 6), }; uint8_t flags; +#ifdef __SANITIZE_THREAD__ + // TSAN can report a false data race on flags, where one thread is writing + // to one of the mutable bits and another thread is reading this immutable + // bit. So precisely suppress that TSAN warning, we separate out this bit + // during TSAN runs. + bool is_secondary_cache_compatible_for_tsan; +#endif // __SANITIZE_THREAD__ + // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!) char key_data[1]; @@ -95,6 +121,15 @@ bool IsHighPri() const { return flags & IS_HIGH_PRI; } bool InHighPriPool() const { return flags & IN_HIGH_PRI_POOL; } bool HasHit() const { return flags & HAS_HIT; } + bool IsSecondaryCacheCompatible() const { +#ifdef __SANITIZE_THREAD__ + return is_secondary_cache_compatible_for_tsan; +#else + return flags & IS_SECONDARY_CACHE_COMPATIBLE; +#endif // __SANITIZE_THREAD__ + } + bool IsPending() const { return flags & IS_PENDING; } + bool IsPromoted() const { return flags & IS_PROMOTED; } void SetInCache(bool in_cache) { if (in_cache) { @@ -122,15 +157,58 @@ void SetHit() { flags |= HAS_HIT; } + void SetSecondaryCacheCompatible(bool compat) { + if (compat) { + flags |= IS_SECONDARY_CACHE_COMPATIBLE; + } else { + flags &= ~IS_SECONDARY_CACHE_COMPATIBLE; + } +#ifdef __SANITIZE_THREAD__ + is_secondary_cache_compatible_for_tsan = compat; +#endif // __SANITIZE_THREAD__ + } + + void SetIncomplete(bool incomp) { + if (incomp) { + flags |= IS_PENDING; + } else { + flags &= ~IS_PENDING; + } + } + + void SetPromoted(bool promoted) { + if (promoted) { + flags |= IS_PROMOTED; + } else { + flags &= ~IS_PROMOTED; + } + } + void Free() { assert(refs == 0); - if (deleter) { - (*deleter)(key(), value); +#ifdef __SANITIZE_THREAD__ + // Here we can safely assert they are the same without a data race reported + assert(((flags & IS_SECONDARY_CACHE_COMPATIBLE) != 0) == + is_secondary_cache_compatible_for_tsan); +#endif // __SANITIZE_THREAD__ + if (!IsSecondaryCacheCompatible() && info_.deleter) { + (*info_.deleter)(key(), value); + } else if (IsSecondaryCacheCompatible()) { + if (IsPending()) { + assert(sec_handle != nullptr); + SecondaryCacheResultHandle* tmp_sec_handle = sec_handle; + tmp_sec_handle->Wait(); + value = tmp_sec_handle->Value(); + delete tmp_sec_handle; + } + if (value) { + (*info_.helper->del_cb)(key(), value); + } } delete[] reinterpret_cast(this); } - // Caclculate the memory usage by metadata + // Calculate the memory usage by metadata inline size_t CalcTotalCharge( CacheMetadataChargePolicy metadata_charge_policy) { size_t meta_charge = 0; @@ -153,7 +231,10 @@ // 4.4.3's builtin hashtable. class LRUHandleTable { public: - LRUHandleTable(); + // If the table uses more hash bits than `max_upper_hash_bits`, + // it will eat into the bits used for sharding, which are constant + // for a given LRUHandleTable. + explicit LRUHandleTable(int max_upper_hash_bits); ~LRUHandleTable(); LRUHandle* Lookup(const Slice& key, uint32_t hash); @@ -161,8 +242,8 @@ LRUHandle* Remove(const Slice& key, uint32_t hash); template - void ApplyToAllCacheEntries(T func) { - for (uint32_t i = 0; i < length_; i++) { + void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) { + for (uint32_t i = index_begin; i < index_end; i++) { LRUHandle* h = list_[i]; while (h != nullptr) { auto n = h->next_hash; @@ -173,6 +254,8 @@ } } + int GetLengthBits() const { return length_bits_; } + private: // Return a pointer to slot that points to a cache entry that // matches key/hash. If there is no such cache entry, return a @@ -181,11 +264,19 @@ void Resize(); + // Number of hash bits (upper because lower bits used for sharding) + // used for table index. Length == 1 << length_bits_ + int length_bits_; + // The table consists of an array of buckets where each bucket is // a linked list of cache entries that hash into the bucket. - LRUHandle** list_; - uint32_t length_; + std::unique_ptr list_; + + // Number of elements currently in the table uint32_t elems_; + + // Set from max_upper_hash_bits (see constructor) + const int max_length_bits_; }; // A single shard of sharded cache. @@ -193,7 +284,9 @@ public: LRUCacheShard(size_t capacity, bool strict_capacity_limit, double high_pri_pool_ratio, bool use_adaptive_mutex, - CacheMetadataChargePolicy metadata_charge_policy); + CacheMetadataChargePolicy metadata_charge_policy, + int max_upper_hash_bits, + const std::shared_ptr& secondary_cache); virtual ~LRUCacheShard() override = default; // Separate from constructor so caller can easily make an array of LRUCache @@ -209,11 +302,35 @@ // Like Cache methods, but with an extra "hash" parameter. virtual Status Insert(const Slice& key, uint32_t hash, void* value, - size_t charge, - void (*deleter)(const Slice& key, void* value), + size_t charge, Cache::DeleterFn deleter, Cache::Handle** handle, - Cache::Priority priority) override; - virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; + Cache::Priority priority) override { + return Insert(key, hash, value, charge, deleter, nullptr, handle, priority); + } + virtual Status Insert(const Slice& key, uint32_t hash, void* value, + const Cache::CacheItemHelper* helper, size_t charge, + Cache::Handle** handle, + Cache::Priority priority) override { + assert(helper); + return Insert(key, hash, value, charge, nullptr, helper, handle, priority); + } + // If helper_cb is null, the values of the following arguments don't + // matter + virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash, + const ShardedCache::CacheItemHelper* helper, + const ShardedCache::CreateCallback& create_cb, + ShardedCache::Priority priority, bool wait, + Statistics* stats) override; + virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override { + return Lookup(key, hash, nullptr, nullptr, Cache::Priority::LOW, true, + nullptr); + } + virtual bool Release(Cache::Handle* handle, bool /*useful*/, + bool force_erase) override { + return Release(handle, force_erase); + } + virtual bool IsReady(Cache::Handle* /*handle*/) override; + virtual void Wait(Cache::Handle* /*handle*/) override {} virtual bool Ref(Cache::Handle* handle) override; virtual bool Release(Cache::Handle* handle, bool force_erase = false) override; @@ -226,8 +343,10 @@ virtual size_t GetUsage() const override; virtual size_t GetPinnedUsage() const override; - virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), - bool thread_safe) override; + virtual void ApplyToSomeEntries( + const std::function& callback, + uint32_t average_entries_per_lock, uint32_t* state) override; virtual void EraseUnRefEntries() override; @@ -239,10 +358,27 @@ // not threadsafe size_t TEST_GetLRUSize(); - // Retrives high pri pool ratio + // Retrieves high pri pool ratio double GetHighPriPoolRatio(); private: + friend class LRUCache; + // Insert an item into the hash table and, if handle is null, insert into + // the LRU list. Older items are evicted as necessary. If the cache is full + // and free_handle_on_fail is true, the item is deleted and handle is set to. + Status InsertItem(LRUHandle* item, Cache::Handle** handle, + bool free_handle_on_fail); + Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, + DeleterFn deleter, const Cache::CacheItemHelper* helper, + Cache::Handle** handle, Cache::Priority priority); + // Promote an item looked up from the secondary cache to the LRU cache. The + // item is only inserted into the hash table and not the LRU list, and only + // if the cache is not at full capacity, as is the case during Insert. The + // caller should hold a reference on the LRUHandle. When the caller releases + // the last reference, the item is added to the LRU list. + // The item is promoted to the high pri or low pri pool as specified by the + // caller in Lookup. + void Promote(LRUHandle* e); void LRU_Remove(LRUHandle* e); void LRU_Insert(LRUHandle* e); @@ -303,6 +439,8 @@ // We don't count mutex_ as the cache's internal state so semantically we // don't mind mutex_ invoking the non-const actions. mutable port::Mutex mutex_; + + std::shared_ptr secondary_cache_; }; class LRUCache @@ -316,24 +454,28 @@ std::shared_ptr memory_allocator = nullptr, bool use_adaptive_mutex = kDefaultToAdaptiveMutex, CacheMetadataChargePolicy metadata_charge_policy = - kDontChargeCacheMetadata); + kDontChargeCacheMetadata, + const std::shared_ptr& secondary_cache = nullptr); virtual ~LRUCache(); virtual const char* Name() const override { return "LRUCache"; } - virtual CacheShard* GetShard(int shard) override; - virtual const CacheShard* GetShard(int shard) const override; + virtual CacheShard* GetShard(uint32_t shard) override; + virtual const CacheShard* GetShard(uint32_t shard) const override; virtual void* Value(Handle* handle) override; virtual size_t GetCharge(Handle* handle) const override; virtual uint32_t GetHash(Handle* handle) const override; + virtual DeleterFn GetDeleter(Handle* handle) const override; virtual void DisownData() override; + virtual void WaitAll(std::vector& handles) override; // Retrieves number of elements in LRU, for unit test purpose only size_t TEST_GetLRUSize(); - // Retrives high pri pool ratio + // Retrieves high pri pool ratio double GetHighPriPoolRatio(); private: LRUCacheShard* shards_ = nullptr; int num_shards_ = 0; + std::shared_ptr secondary_cache_; }; } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/lru_cache_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/lru_cache_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/lru_cache_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -7,8 +7,21 @@ #include #include + +#include "cache/cache_key.h" +#include "db/db_test_util.h" +#include "file/sst_file_manager_impl.h" #include "port/port.h" +#include "port/stack_trace.h" +#include "rocksdb/cache.h" +#include "rocksdb/io_status.h" +#include "rocksdb/sst_file_manager.h" +#include "rocksdb/utilities/cache_dump_load.h" #include "test_util/testharness.h" +#include "util/coding.h" +#include "util/random.h" +#include "utilities/cache_dump_load_impl.h" +#include "utilities/fault_injection_fs.h" namespace ROCKSDB_NAMESPACE { @@ -30,15 +43,17 @@ DeleteCache(); cache_ = reinterpret_cast( port::cacheline_aligned_alloc(sizeof(LRUCacheShard))); - new (cache_) LRUCacheShard(capacity, false /*strict_capcity_limit*/, - high_pri_pool_ratio, use_adaptive_mutex, - kDontChargeCacheMetadata); + new (cache_) LRUCacheShard( + capacity, false /*strict_capcity_limit*/, high_pri_pool_ratio, + use_adaptive_mutex, kDontChargeCacheMetadata, + 24 /*max_upper_hash_bits*/, nullptr /*secondary_cache*/); } void Insert(const std::string& key, Cache::Priority priority = Cache::Priority::LOW) { - cache_->Insert(key, 0 /*hash*/, nullptr /*value*/, 1 /*charge*/, - nullptr /*deleter*/, nullptr /*handle*/, priority); + EXPECT_OK(cache_->Insert(key, 0 /*hash*/, nullptr /*value*/, 1 /*charge*/, + nullptr /*deleter*/, nullptr /*handle*/, + priority)); } void Insert(char key, Cache::Priority priority = Cache::Priority::LOW) { @@ -190,6 +205,1641 @@ ValidateLRUList({"e", "f", "g", "Z", "d"}, 2); } +class TestSecondaryCache : public SecondaryCache { + public: + // Specifies what action to take on a lookup for a particular key + enum ResultType { + SUCCESS, + // Fail lookup immediately + FAIL, + // Defer the result. It will returned after Wait/WaitAll is called + DEFER, + // Defer the result and eventually return failure + DEFER_AND_FAIL + }; + + using ResultMap = std::unordered_map; + + explicit TestSecondaryCache(size_t capacity) + : num_inserts_(0), num_lookups_(0), inject_failure_(false) { + cache_ = NewLRUCache(capacity, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + } + ~TestSecondaryCache() override { cache_.reset(); } + + const char* Name() const override { return "TestSecondaryCache"; } + + void InjectFailure() { inject_failure_ = true; } + + void ResetInjectFailure() { inject_failure_ = false; } + + void SetDbSessionId(const std::string& db_session_id) { + // NOTE: we assume the file is smaller than kMaxFileSizeStandardEncoding + // for this to work, but that's safe in a test. + auto base = OffsetableCacheKey("unknown", db_session_id, 1, 1); + ckey_prefix_ = base.CommonPrefixSlice().ToString(); + } + + Status Insert(const Slice& key, void* value, + const Cache::CacheItemHelper* helper) override { + if (inject_failure_) { + return Status::Corruption("Insertion Data Corrupted"); + } + EXPECT_TRUE(IsDbSessionLowerAsKeyPrefix(key)); + size_t size; + char* buf; + Status s; + + num_inserts_++; + size = (*helper->size_cb)(value); + buf = new char[size + sizeof(uint64_t)]; + EncodeFixed64(buf, size); + s = (*helper->saveto_cb)(value, 0, size, buf + sizeof(uint64_t)); + if (!s.ok()) { + delete[] buf; + return s; + } + return cache_->Insert(key, buf, size, + [](const Slice& /*key*/, void* val) -> void { + delete[] static_cast(val); + }); + } + + std::unique_ptr Lookup( + const Slice& key, const Cache::CreateCallback& create_cb, + bool /*wait*/) override { + std::string key_str = key.ToString(); + TEST_SYNC_POINT_CALLBACK("TestSecondaryCache::Lookup", &key_str); + + std::unique_ptr secondary_handle; + ResultType type = ResultType::SUCCESS; + auto iter = result_map_.find(key.ToString()); + if (iter != result_map_.end()) { + type = iter->second; + } + if (type == ResultType::FAIL) { + return secondary_handle; + } + + Cache::Handle* handle = cache_->Lookup(key); + num_lookups_++; + if (handle) { + void* value = nullptr; + size_t charge = 0; + Status s; + if (type != ResultType::DEFER_AND_FAIL) { + char* ptr = (char*)cache_->Value(handle); + size_t size = DecodeFixed64(ptr); + ptr += sizeof(uint64_t); + s = create_cb(ptr, size, &value, &charge); + } + if (s.ok()) { + secondary_handle.reset(new TestSecondaryCacheResultHandle( + cache_.get(), handle, value, charge, type)); + } else { + cache_->Release(handle); + } + } + return secondary_handle; + } + + void Erase(const Slice& /*key*/) override {} + + void WaitAll(std::vector handles) override { + for (SecondaryCacheResultHandle* handle : handles) { + TestSecondaryCacheResultHandle* sec_handle = + static_cast(handle); + sec_handle->SetReady(); + } + } + + std::string GetPrintableOptions() const override { return ""; } + + void SetResultMap(ResultMap&& map) { result_map_ = std::move(map); } + + uint32_t num_inserts() { return num_inserts_; } + + uint32_t num_lookups() { return num_lookups_; } + + bool IsDbSessionLowerAsKeyPrefix(const Slice& key) { + return key.starts_with(ckey_prefix_); + } + + private: + class TestSecondaryCacheResultHandle : public SecondaryCacheResultHandle { + public: + TestSecondaryCacheResultHandle(Cache* cache, Cache::Handle* handle, + void* value, size_t size, ResultType type) + : cache_(cache), + handle_(handle), + value_(value), + size_(size), + is_ready_(true) { + if (type != ResultType::SUCCESS) { + is_ready_ = false; + } + } + + ~TestSecondaryCacheResultHandle() override { cache_->Release(handle_); } + + bool IsReady() override { return is_ready_; } + + void Wait() override {} + + void* Value() override { + assert(is_ready_); + return value_; + } + + size_t Size() override { return Value() ? size_ : 0; } + + void SetReady() { is_ready_ = true; } + + private: + Cache* cache_; + Cache::Handle* handle_; + void* value_; + size_t size_; + bool is_ready_; + }; + + std::shared_ptr cache_; + uint32_t num_inserts_; + uint32_t num_lookups_; + bool inject_failure_; + std::string ckey_prefix_; + ResultMap result_map_; +}; + +class DBSecondaryCacheTest : public DBTestBase { + public: + DBSecondaryCacheTest() + : DBTestBase("db_secondary_cache_test", /*env_do_fsync=*/true) { + fault_fs_.reset(new FaultInjectionTestFS(env_->GetFileSystem())); + fault_env_.reset(new CompositeEnvWrapper(env_, fault_fs_)); + } + + std::shared_ptr fault_fs_; + std::unique_ptr fault_env_; +}; + +class LRUSecondaryCacheTest : public LRUCacheTest { + public: + LRUSecondaryCacheTest() : fail_create_(false) {} + ~LRUSecondaryCacheTest() {} + + protected: + class TestItem { + public: + TestItem(const char* buf, size_t size) : buf_(new char[size]), size_(size) { + memcpy(buf_.get(), buf, size); + } + ~TestItem() {} + + char* Buf() { return buf_.get(); } + size_t Size() { return size_; } + std::string ToString() { return std::string(Buf(), Size()); } + + private: + std::unique_ptr buf_; + size_t size_; + }; + + static size_t SizeCallback(void* obj) { + return reinterpret_cast(obj)->Size(); + } + + static Status SaveToCallback(void* from_obj, size_t from_offset, + size_t length, void* out) { + TestItem* item = reinterpret_cast(from_obj); + char* buf = item->Buf(); + EXPECT_EQ(length, item->Size()); + EXPECT_EQ(from_offset, 0); + memcpy(out, buf, length); + return Status::OK(); + } + + static void DeletionCallback(const Slice& /*key*/, void* obj) { + delete reinterpret_cast(obj); + } + + static Cache::CacheItemHelper helper_; + + static Status SaveToCallbackFail(void* /*obj*/, size_t /*offset*/, + size_t /*size*/, void* /*out*/) { + return Status::NotSupported(); + } + + static Cache::CacheItemHelper helper_fail_; + + Cache::CreateCallback test_item_creator = + [&](void* buf, size_t size, void** out_obj, size_t* charge) -> Status { + if (fail_create_) { + return Status::NotSupported(); + } + *out_obj = reinterpret_cast(new TestItem((char*)buf, size)); + *charge = size; + return Status::OK(); + }; + + void SetFailCreate(bool fail) { fail_create_ = fail; } + + private: + bool fail_create_; +}; + +Cache::CacheItemHelper LRUSecondaryCacheTest::helper_( + LRUSecondaryCacheTest::SizeCallback, LRUSecondaryCacheTest::SaveToCallback, + LRUSecondaryCacheTest::DeletionCallback); + +Cache::CacheItemHelper LRUSecondaryCacheTest::helper_fail_( + LRUSecondaryCacheTest::SizeCallback, + LRUSecondaryCacheTest::SaveToCallbackFail, + LRUSecondaryCacheTest::DeletionCallback); + +TEST_F(LRUSecondaryCacheTest, BasicTest) { + LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex, + kDontChargeCacheMetadata); + std::shared_ptr secondary_cache = + std::make_shared(2048); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + std::shared_ptr stats = CreateDBStatistics(); + + Random rnd(301); + std::string str1 = rnd.RandomString(1020); + TestItem* item1 = new TestItem(str1.data(), str1.length()); + ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_, + str1.length())); + std::string str2 = rnd.RandomString(1020); + TestItem* item2 = new TestItem(str2.data(), str2.length()); + // k1 should be demoted to NVM + ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_, + str2.length())); + + get_perf_context()->Reset(); + Cache::Handle* handle; + handle = + cache->Lookup("k2", &LRUSecondaryCacheTest::helper_, test_item_creator, + Cache::Priority::LOW, true, stats.get()); + ASSERT_NE(handle, nullptr); + cache->Release(handle); + // This lookup should promote k1 and demote k2 + handle = + cache->Lookup("k1", &LRUSecondaryCacheTest::helper_, test_item_creator, + Cache::Priority::LOW, true, stats.get()); + ASSERT_NE(handle, nullptr); + cache->Release(handle); + ASSERT_EQ(secondary_cache->num_inserts(), 2u); + ASSERT_EQ(secondary_cache->num_lookups(), 1u); + ASSERT_EQ(stats->getTickerCount(SECONDARY_CACHE_HITS), + secondary_cache->num_lookups()); + PerfContext perf_ctx = *get_perf_context(); + ASSERT_EQ(perf_ctx.secondary_cache_hit_count, secondary_cache->num_lookups()); + + cache.reset(); + secondary_cache.reset(); +} + +TEST_F(LRUSecondaryCacheTest, BasicFailTest) { + LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex, + kDontChargeCacheMetadata); + std::shared_ptr secondary_cache = + std::make_shared(2048); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + + Random rnd(301); + std::string str1 = rnd.RandomString(1020); + TestItem* item1 = new TestItem(str1.data(), str1.length()); + ASSERT_NOK(cache->Insert("k1", item1, nullptr, str1.length())); + ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_, + str1.length())); + + Cache::Handle* handle; + handle = cache->Lookup("k2", nullptr, test_item_creator, Cache::Priority::LOW, + true); + ASSERT_EQ(handle, nullptr); + handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_, + test_item_creator, Cache::Priority::LOW, false); + ASSERT_EQ(handle, nullptr); + + cache.reset(); + secondary_cache.reset(); +} + +TEST_F(LRUSecondaryCacheTest, SaveFailTest) { + LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex, + kDontChargeCacheMetadata); + std::shared_ptr secondary_cache = + std::make_shared(2048); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + + Random rnd(301); + std::string str1 = rnd.RandomString(1020); + TestItem* item1 = new TestItem(str1.data(), str1.length()); + ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_fail_, + str1.length())); + std::string str2 = rnd.RandomString(1020); + TestItem* item2 = new TestItem(str2.data(), str2.length()); + // k1 should be demoted to NVM + ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_fail_, + str2.length())); + + Cache::Handle* handle; + handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_fail_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_NE(handle, nullptr); + cache->Release(handle); + // This lookup should fail, since k1 demotion would have failed + handle = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_fail_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_EQ(handle, nullptr); + // Since k1 didn't get promoted, k2 should still be in cache + handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_fail_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_NE(handle, nullptr); + cache->Release(handle); + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 1u); + + cache.reset(); + secondary_cache.reset(); +} + +TEST_F(LRUSecondaryCacheTest, CreateFailTest) { + LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex, + kDontChargeCacheMetadata); + std::shared_ptr secondary_cache = + std::make_shared(2048); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + + Random rnd(301); + std::string str1 = rnd.RandomString(1020); + TestItem* item1 = new TestItem(str1.data(), str1.length()); + ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_, + str1.length())); + std::string str2 = rnd.RandomString(1020); + TestItem* item2 = new TestItem(str2.data(), str2.length()); + // k1 should be demoted to NVM + ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_, + str2.length())); + + Cache::Handle* handle; + SetFailCreate(true); + handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_NE(handle, nullptr); + cache->Release(handle); + // This lookup should fail, since k1 creation would have failed + handle = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_EQ(handle, nullptr); + // Since k1 didn't get promoted, k2 should still be in cache + handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_NE(handle, nullptr); + cache->Release(handle); + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 1u); + + cache.reset(); + secondary_cache.reset(); +} + +TEST_F(LRUSecondaryCacheTest, FullCapacityTest) { + LRUCacheOptions opts(1024, 0, /*_strict_capacity_limit=*/true, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + std::shared_ptr secondary_cache = + std::make_shared(2048); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + + Random rnd(301); + std::string str1 = rnd.RandomString(1020); + TestItem* item1 = new TestItem(str1.data(), str1.length()); + ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_, + str1.length())); + std::string str2 = rnd.RandomString(1020); + TestItem* item2 = new TestItem(str2.data(), str2.length()); + // k1 should be demoted to NVM + ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_, + str2.length())); + + Cache::Handle* handle; + handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_NE(handle, nullptr); + // k1 promotion should fail due to the block cache being at capacity, + // but the lookup should still succeed + Cache::Handle* handle2; + handle2 = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_NE(handle2, nullptr); + // Since k1 didn't get inserted, k2 should still be in cache + cache->Release(handle); + cache->Release(handle2); + handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_, + test_item_creator, Cache::Priority::LOW, true); + ASSERT_NE(handle, nullptr); + cache->Release(handle); + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 1u); + + cache.reset(); + secondary_cache.reset(); +} + +// In this test, the block cache size is set to 4096, after insert 6 KV-pairs +// and flush, there are 5 blocks in this SST file, 2 data blocks and 3 meta +// blocks. block_1 size is 4096 and block_2 size is 2056. The total size +// of the meta blocks are about 900 to 1000. Therefore, in any situation, +// if we try to insert block_1 to the block cache, it will always fails. Only +// block_2 will be successfully inserted into the block cache. +TEST_F(DBSecondaryCacheTest, TestSecondaryCacheCorrectness1) { + LRUCacheOptions opts(4 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + fault_fs_->SetFailGetUniqueId(true); + + // Set the file paranoid check, so after flush, the file will be read + // all the blocks will be accessed. + options.paranoid_file_checks = true; + DestroyAndReopen(options); + std::string session_id; + ASSERT_OK(db_->GetDbSessionId(session_id)); + secondary_cache->SetDbSessionId(session_id); + Random rnd(301); + const int N = 6; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + // After Flush is successful, RocksDB will do the paranoid check for the new + // SST file. Meta blocks are always cached in the block cache and they + // will not be evicted. When block_2 is cache miss and read out, it is + // inserted to the block cache. Note that, block_1 is never successfully + // inserted to the block cache. Here are 2 lookups in the secondary cache + // for block_1 and block_2 + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + Compact("a", "z"); + // Compaction will create the iterator to scan the whole file. So all the + // blocks are needed. Meta blocks are always cached. When block_1 is read + // out, block_2 is evicted from block cache and inserted to secondary + // cache. + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 3u); + + std::string v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // The first data block is not in the cache, similarly, trigger the block + // cache Lookup and secondary cache lookup for block_1. But block_1 will not + // be inserted successfully due to the size. Currently, cache only has + // the meta blocks. + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 4u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + // The second data block is not in the cache, similarly, trigger the block + // cache Lookup and secondary cache lookup for block_2 and block_2 is found + // in the secondary cache. Now block cache has block_2 + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 5u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + // block_2 is in the block cache. There is a block cache hit. No need to + // lookup or insert the secondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 5u); + + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // Lookup the first data block, not in the block cache, so lookup the + // secondary cache. Also not in the secondary cache. After Get, still + // block_1 is will not be cached. + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 6u); + + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // Lookup the first data block, not in the block cache, so lookup the + // secondary cache. Also not in the secondary cache. After Get, still + // block_1 is will not be cached. + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 7u); + + Destroy(options); +} + +// In this test, the block cache size is set to 6100, after insert 6 KV-pairs +// and flush, there are 5 blocks in this SST file, 2 data blocks and 3 meta +// blocks. block_1 size is 4096 and block_2 size is 2056. The total size +// of the meta blocks are about 900 to 1000. Therefore, we can successfully +// insert and cache block_1 in the block cache (this is the different place +// from TestSecondaryCacheCorrectness1) +TEST_F(DBSecondaryCacheTest, TestSecondaryCacheCorrectness2) { + LRUCacheOptions opts(6100, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex, + kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.paranoid_file_checks = true; + options.env = fault_env_.get(); + fault_fs_->SetFailGetUniqueId(true); + DestroyAndReopen(options); + std::string session_id; + ASSERT_OK(db_->GetDbSessionId(session_id)); + secondary_cache->SetDbSessionId(session_id); + Random rnd(301); + const int N = 6; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + // After Flush is successful, RocksDB will do the paranoid check for the new + // SST file. Meta blocks are always cached in the block cache and they + // will not be evicted. When block_2 is cache miss and read out, it is + // inserted to the block cache. Thefore, block_1 is evicted from block + // cache and successfully inserted to the secondary cache. Here are 2 + // lookups in the secondary cache for block_1 and block_2. + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + Compact("a", "z"); + // Compaction will create the iterator to scan the whole file. So all the + // blocks are needed. After Flush, only block_2 is cached in block cache + // and block_1 is in the secondary cache. So when read block_1, it is + // read out from secondary cache and inserted to block cache. At the same + // time, block_2 is inserted to secondary cache. Now, secondary cache has + // both block_1 and block_2. After compaction, block_1 is in the cache. + ASSERT_EQ(secondary_cache->num_inserts(), 2u); + ASSERT_EQ(secondary_cache->num_lookups(), 3u); + + std::string v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // This Get needs to access block_1, since block_1 is cached in block cache + // there is no secondary cache lookup. + ASSERT_EQ(secondary_cache->num_inserts(), 2u); + ASSERT_EQ(secondary_cache->num_lookups(), 3u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + // This Get needs to access block_2 which is not in the block cache. So + // it will lookup the secondary cache for block_2 and cache it in the + // block_cache. + ASSERT_EQ(secondary_cache->num_inserts(), 2u); + ASSERT_EQ(secondary_cache->num_lookups(), 4u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + // This Get needs to access block_2 which is already in the block cache. + // No need to lookup secondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 2u); + ASSERT_EQ(secondary_cache->num_lookups(), 4u); + + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // This Get needs to access block_1, since block_1 is not in block cache + // there is one econdary cache lookup. Then, block_1 is cached in the + // block cache. + ASSERT_EQ(secondary_cache->num_inserts(), 2u); + ASSERT_EQ(secondary_cache->num_lookups(), 5u); + + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // This Get needs to access block_1, since block_1 is cached in block cache + // there is no secondary cache lookup. + ASSERT_EQ(secondary_cache->num_inserts(), 2u); + ASSERT_EQ(secondary_cache->num_lookups(), 5u); + + Destroy(options); +} + +// The block cache size is set to 1024*1024, after insert 6 KV-pairs +// and flush, there are 5 blocks in this SST file, 2 data blocks and 3 meta +// blocks. block_1 size is 4096 and block_2 size is 2056. The total size +// of the meta blocks are about 900 to 1000. Therefore, we can successfully +// cache all the blocks in the block cache and there is not secondary cache +// insertion. 2 lookup is needed for the blocks. +TEST_F(DBSecondaryCacheTest, NoSecondaryCacheInsertion) { + LRUCacheOptions opts(1024 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.paranoid_file_checks = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + fault_fs_->SetFailGetUniqueId(true); + + DestroyAndReopen(options); + std::string session_id; + ASSERT_OK(db_->GetDbSessionId(session_id)); + secondary_cache->SetDbSessionId(session_id); + Random rnd(301); + const int N = 6; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1000); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + // After Flush is successful, RocksDB will do the paranoid check for the new + // SST file. Meta blocks are always cached in the block cache and they + // will not be evicted. Now, block cache is large enough, it cache + // both block_1 and block_2. When first time read block_1 and block_2 + // there are cache misses. So 2 secondary cache lookups are needed for + // the 2 blocks + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + Compact("a", "z"); + // Compaction will iterate the whole SST file. Since all the data blocks + // are in the block cache. No need to lookup the secondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + std::string v = Get(Key(0)); + ASSERT_EQ(1000, v.size()); + // Since the block cache is large enough, all the blocks are cached. we + // do not need to lookup the seondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + Destroy(options); +} + +TEST_F(DBSecondaryCacheTest, SecondaryCacheIntensiveTesting) { + LRUCacheOptions opts(8 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + fault_fs_->SetFailGetUniqueId(true); + DestroyAndReopen(options); + std::string session_id; + ASSERT_OK(db_->GetDbSessionId(session_id)); + secondary_cache->SetDbSessionId(session_id); + Random rnd(301); + const int N = 256; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1000); + ASSERT_OK(Put(Key(i), p_v)); + } + ASSERT_OK(Flush()); + Compact("a", "z"); + + Random r_index(47); + std::string v; + for (int i = 0; i < 1000; i++) { + uint32_t key_i = r_index.Next() % N; + v = Get(Key(key_i)); + } + + // We have over 200 data blocks there will be multiple insertion + // and lookups. + ASSERT_GE(secondary_cache->num_inserts(), 1u); + ASSERT_GE(secondary_cache->num_lookups(), 1u); + + Destroy(options); +} + +// In this test, the block cache size is set to 4096, after insert 6 KV-pairs +// and flush, there are 5 blocks in this SST file, 2 data blocks and 3 meta +// blocks. block_1 size is 4096 and block_2 size is 2056. The total size +// of the meta blocks are about 900 to 1000. Therefore, in any situation, +// if we try to insert block_1 to the block cache, it will always fails. Only +// block_2 will be successfully inserted into the block cache. +TEST_F(DBSecondaryCacheTest, SecondaryCacheFailureTest) { + LRUCacheOptions opts(4 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.paranoid_file_checks = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + fault_fs_->SetFailGetUniqueId(true); + DestroyAndReopen(options); + std::string session_id; + ASSERT_OK(db_->GetDbSessionId(session_id)); + secondary_cache->SetDbSessionId(session_id); + Random rnd(301); + const int N = 6; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + // After Flush is successful, RocksDB will do the paranoid check for the new + // SST file. Meta blocks are always cached in the block cache and they + // will not be evicted. When block_2 is cache miss and read out, it is + // inserted to the block cache. Note that, block_1 is never successfully + // inserted to the block cache. Here are 2 lookups in the secondary cache + // for block_1 and block_2 + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + // Fail the insertion, in LRU cache, the secondary insertion returned status + // is not checked, therefore, the DB will not be influenced. + secondary_cache->InjectFailure(); + Compact("a", "z"); + // Compaction will create the iterator to scan the whole file. So all the + // blocks are needed. Meta blocks are always cached. When block_1 is read + // out, block_2 is evicted from block cache and inserted to secondary + // cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 3u); + + std::string v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // The first data block is not in the cache, similarly, trigger the block + // cache Lookup and secondary cache lookup for block_1. But block_1 will not + // be inserted successfully due to the size. Currently, cache only has + // the meta blocks. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 4u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + // The second data block is not in the cache, similarly, trigger the block + // cache Lookup and secondary cache lookup for block_2 and block_2 is found + // in the secondary cache. Now block cache has block_2 + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 5u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + // block_2 is in the block cache. There is a block cache hit. No need to + // lookup or insert the secondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 5u); + + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // Lookup the first data block, not in the block cache, so lookup the + // secondary cache. Also not in the secondary cache. After Get, still + // block_1 is will not be cached. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 6u); + + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + // Lookup the first data block, not in the block cache, so lookup the + // secondary cache. Also not in the secondary cache. After Get, still + // block_1 is will not be cached. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 7u); + secondary_cache->ResetInjectFailure(); + + Destroy(options); +} + +TEST_F(LRUSecondaryCacheTest, BasicWaitAllTest) { + LRUCacheOptions opts(1024, 2, false, 0.5, nullptr, kDefaultToAdaptiveMutex, + kDontChargeCacheMetadata); + std::shared_ptr secondary_cache = + std::make_shared(32 * 1024); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + const int num_keys = 32; + + Random rnd(301); + std::vector values; + for (int i = 0; i < num_keys; ++i) { + std::string str = rnd.RandomString(1020); + values.emplace_back(str); + TestItem* item = new TestItem(str.data(), str.length()); + ASSERT_OK(cache->Insert("k" + std::to_string(i), item, + &LRUSecondaryCacheTest::helper_, str.length())); + } + // Force all entries to be evicted to the secondary cache + cache->SetCapacity(0); + ASSERT_EQ(secondary_cache->num_inserts(), 32u); + cache->SetCapacity(32 * 1024); + + secondary_cache->SetResultMap( + {{"k3", TestSecondaryCache::ResultType::DEFER}, + {"k4", TestSecondaryCache::ResultType::DEFER_AND_FAIL}, + {"k5", TestSecondaryCache::ResultType::FAIL}}); + std::vector results; + for (int i = 0; i < 6; ++i) { + results.emplace_back( + cache->Lookup("k" + std::to_string(i), &LRUSecondaryCacheTest::helper_, + test_item_creator, Cache::Priority::LOW, false)); + } + cache->WaitAll(results); + for (int i = 0; i < 6; ++i) { + if (i == 4) { + ASSERT_EQ(cache->Value(results[i]), nullptr); + } else if (i == 5) { + ASSERT_EQ(results[i], nullptr); + continue; + } else { + TestItem* item = static_cast(cache->Value(results[i])); + ASSERT_EQ(item->ToString(), values[i]); + } + cache->Release(results[i]); + } + + cache.reset(); + secondary_cache.reset(); +} + +// In this test, we have one KV pair per data block. We indirectly determine +// the cache key associated with each data block (and thus each KV) by using +// a sync point callback in TestSecondaryCache::Lookup. We then control the +// lookup result by setting the ResultMap. +TEST_F(DBSecondaryCacheTest, TestSecondaryCacheMultiGet) { + LRUCacheOptions opts(1 << 20, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex, + kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + table_options.cache_index_and_filter_blocks = false; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.paranoid_file_checks = true; + DestroyAndReopen(options); + Random rnd(301); + const int N = 8; + std::vector keys; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(4000); + keys.emplace_back(p_v); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + // After Flush is successful, RocksDB does the paranoid check for the new + // SST file. This will try to lookup all data blocks in the secondary + // cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 8u); + + cache->SetCapacity(0); + ASSERT_EQ(secondary_cache->num_inserts(), 8u); + cache->SetCapacity(1 << 20); + + std::vector cache_keys; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "TestSecondaryCache::Lookup", [&cache_keys](void* key) -> void { + cache_keys.emplace_back(*(static_cast(key))); + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + for (int i = 0; i < N; ++i) { + std::string v = Get(Key(i)); + ASSERT_EQ(4000, v.size()); + ASSERT_EQ(v, keys[i]); + } + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ASSERT_EQ(secondary_cache->num_lookups(), 16u); + cache->SetCapacity(0); + cache->SetCapacity(1 << 20); + + ASSERT_EQ(Get(Key(2)), keys[2]); + ASSERT_EQ(Get(Key(7)), keys[7]); + secondary_cache->SetResultMap( + {{cache_keys[3], TestSecondaryCache::ResultType::DEFER}, + {cache_keys[4], TestSecondaryCache::ResultType::DEFER_AND_FAIL}, + {cache_keys[5], TestSecondaryCache::ResultType::FAIL}}); + + std::vector mget_keys( + {Key(0), Key(1), Key(2), Key(3), Key(4), Key(5), Key(6), Key(7)}); + std::vector values(mget_keys.size()); + std::vector s(keys.size()); + std::vector key_slices; + for (const std::string& key : mget_keys) { + key_slices.emplace_back(key); + } + uint32_t num_lookups = secondary_cache->num_lookups(); + dbfull()->MultiGet(ReadOptions(), dbfull()->DefaultColumnFamily(), + key_slices.size(), key_slices.data(), values.data(), + s.data(), false); + ASSERT_EQ(secondary_cache->num_lookups(), num_lookups + 5); + for (int i = 0; i < N; ++i) { + ASSERT_OK(s[i]); + ASSERT_EQ(values[i].ToString(), keys[i]); + values[i].Reset(); + } + Destroy(options); +} + +class LRUCacheWithStat : public LRUCache { + public: + LRUCacheWithStat( + size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit, + double _high_pri_pool_ratio, + std::shared_ptr _memory_allocator = nullptr, + bool _use_adaptive_mutex = kDefaultToAdaptiveMutex, + CacheMetadataChargePolicy _metadata_charge_policy = + kDontChargeCacheMetadata, + const std::shared_ptr& _secondary_cache = nullptr) + : LRUCache(_capacity, _num_shard_bits, _strict_capacity_limit, + _high_pri_pool_ratio, _memory_allocator, _use_adaptive_mutex, + _metadata_charge_policy, _secondary_cache) { + insert_count_ = 0; + lookup_count_ = 0; + } + ~LRUCacheWithStat() {} + + Status Insert(const Slice& key, void* value, size_t charge, DeleterFn deleter, + Handle** handle, Priority priority) override { + insert_count_++; + return LRUCache::Insert(key, value, charge, deleter, handle, priority); + } + Status Insert(const Slice& key, void* value, const CacheItemHelper* helper, + size_t chargge, Handle** handle = nullptr, + Priority priority = Priority::LOW) override { + insert_count_++; + return LRUCache::Insert(key, value, helper, chargge, handle, priority); + } + Handle* Lookup(const Slice& key, Statistics* stats) override { + lookup_count_++; + return LRUCache::Lookup(key, stats); + } + Handle* Lookup(const Slice& key, const CacheItemHelper* helper, + const CreateCallback& create_cb, Priority priority, bool wait, + Statistics* stats = nullptr) override { + lookup_count_++; + return LRUCache::Lookup(key, helper, create_cb, priority, wait, stats); + } + + uint32_t GetInsertCount() { return insert_count_; } + uint32_t GetLookupcount() { return lookup_count_; } + void ResetCount() { + insert_count_ = 0; + lookup_count_ = 0; + } + + private: + uint32_t insert_count_; + uint32_t lookup_count_; +}; + +#ifndef ROCKSDB_LITE + +TEST_F(DBSecondaryCacheTest, LRUCacheDumpLoadBasic) { + LRUCacheOptions cache_opts(1024 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + LRUCacheWithStat* tmp_cache = new LRUCacheWithStat( + cache_opts.capacity, cache_opts.num_shard_bits, + cache_opts.strict_capacity_limit, cache_opts.high_pri_pool_ratio, + cache_opts.memory_allocator, cache_opts.use_adaptive_mutex, + cache_opts.metadata_charge_policy, cache_opts.secondary_cache); + std::shared_ptr cache(tmp_cache); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + DestroyAndReopen(options); + fault_fs_->SetFailGetUniqueId(true); + + Random rnd(301); + const int N = 256; + std::vector value; + char buf[1000]; + memset(buf, 'a', 1000); + value.resize(N); + for (int i = 0; i < N; i++) { + // std::string p_v = rnd.RandomString(1000); + std::string p_v(buf, 1000); + value[i] = p_v; + ASSERT_OK(Put(Key(i), p_v)); + } + ASSERT_OK(Flush()); + Compact("a", "z"); + + // do th eread for all the key value pairs, so all the blocks should be in + // cache + uint32_t start_insert = tmp_cache->GetInsertCount(); + uint32_t start_lookup = tmp_cache->GetLookupcount(); + std::string v; + for (int i = 0; i < N; i++) { + v = Get(Key(i)); + ASSERT_EQ(v, value[i]); + } + uint32_t dump_insert = tmp_cache->GetInsertCount() - start_insert; + uint32_t dump_lookup = tmp_cache->GetLookupcount() - start_lookup; + ASSERT_EQ(63, + static_cast(dump_insert)); // the insert in the block cache + ASSERT_EQ(256, + static_cast(dump_lookup)); // the lookup in the block cache + // We have enough blocks in the block cache + + CacheDumpOptions cd_options; + cd_options.clock = fault_env_->GetSystemClock().get(); + std::string dump_path = db_->GetName() + "/cache_dump"; + std::unique_ptr dump_writer; + Status s = NewToFileCacheDumpWriter(fault_fs_, FileOptions(), dump_path, + &dump_writer); + ASSERT_OK(s); + std::unique_ptr cache_dumper; + s = NewDefaultCacheDumper(cd_options, cache, std::move(dump_writer), + &cache_dumper); + ASSERT_OK(s); + std::vector db_list; + db_list.push_back(db_); + s = cache_dumper->SetDumpFilter(db_list); + ASSERT_OK(s); + s = cache_dumper->DumpCacheEntriesToWriter(); + ASSERT_OK(s); + cache_dumper.reset(); + + // we have a new cache it is empty, then, before we do the Get, we do the + // dumpload + std::shared_ptr secondary_cache = + std::make_shared(2048 * 1024); + cache_opts.secondary_cache = secondary_cache; + tmp_cache = new LRUCacheWithStat( + cache_opts.capacity, cache_opts.num_shard_bits, + cache_opts.strict_capacity_limit, cache_opts.high_pri_pool_ratio, + cache_opts.memory_allocator, cache_opts.use_adaptive_mutex, + cache_opts.metadata_charge_policy, cache_opts.secondary_cache); + std::shared_ptr cache_new(tmp_cache); + table_options.block_cache = cache_new; + table_options.block_size = 4 * 1024; + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + + // start to load the data to new block cache + start_insert = secondary_cache->num_inserts(); + start_lookup = secondary_cache->num_lookups(); + std::unique_ptr dump_reader; + s = NewFromFileCacheDumpReader(fault_fs_, FileOptions(), dump_path, + &dump_reader); + ASSERT_OK(s); + std::unique_ptr cache_loader; + s = NewDefaultCacheDumpedLoader(cd_options, table_options, secondary_cache, + std::move(dump_reader), &cache_loader); + ASSERT_OK(s); + s = cache_loader->RestoreCacheEntriesToSecondaryCache(); + ASSERT_OK(s); + uint32_t load_insert = secondary_cache->num_inserts() - start_insert; + uint32_t load_lookup = secondary_cache->num_lookups() - start_lookup; + // check the number we inserted + ASSERT_EQ(64, static_cast(load_insert)); + ASSERT_EQ(0, static_cast(load_lookup)); + ASSERT_OK(s); + + Reopen(options); + + // After load, we do the Get again + start_insert = secondary_cache->num_inserts(); + start_lookup = secondary_cache->num_lookups(); + uint32_t cache_insert = tmp_cache->GetInsertCount(); + uint32_t cache_lookup = tmp_cache->GetLookupcount(); + for (int i = 0; i < N; i++) { + v = Get(Key(i)); + ASSERT_EQ(v, value[i]); + } + uint32_t final_insert = secondary_cache->num_inserts() - start_insert; + uint32_t final_lookup = secondary_cache->num_lookups() - start_lookup; + // no insert to secondary cache + ASSERT_EQ(0, static_cast(final_insert)); + // lookup the secondary to get all blocks + ASSERT_EQ(64, static_cast(final_lookup)); + uint32_t block_insert = tmp_cache->GetInsertCount() - cache_insert; + uint32_t block_lookup = tmp_cache->GetLookupcount() - cache_lookup; + // Check the new block cache insert and lookup, should be no insert since all + // blocks are from the secondary cache. + ASSERT_EQ(0, static_cast(block_insert)); + ASSERT_EQ(256, static_cast(block_lookup)); + + fault_fs_->SetFailGetUniqueId(false); + Destroy(options); +} + +TEST_F(DBSecondaryCacheTest, LRUCacheDumpLoadWithFilter) { + LRUCacheOptions cache_opts(1024 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + LRUCacheWithStat* tmp_cache = new LRUCacheWithStat( + cache_opts.capacity, cache_opts.num_shard_bits, + cache_opts.strict_capacity_limit, cache_opts.high_pri_pool_ratio, + cache_opts.memory_allocator, cache_opts.use_adaptive_mutex, + cache_opts.metadata_charge_policy, cache_opts.secondary_cache); + std::shared_ptr cache(tmp_cache); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + std::string dbname1 = test::PerThreadDBPath("db_1"); + ASSERT_OK(DestroyDB(dbname1, options)); + DB* db1 = nullptr; + ASSERT_OK(DB::Open(options, dbname1, &db1)); + std::string dbname2 = test::PerThreadDBPath("db_2"); + ASSERT_OK(DestroyDB(dbname2, options)); + DB* db2 = nullptr; + ASSERT_OK(DB::Open(options, dbname2, &db2)); + fault_fs_->SetFailGetUniqueId(true); + + // write the KVs to db1 + Random rnd(301); + const int N = 256; + std::vector value1; + WriteOptions wo; + char buf[1000]; + memset(buf, 'a', 1000); + value1.resize(N); + for (int i = 0; i < N; i++) { + std::string p_v(buf, 1000); + value1[i] = p_v; + ASSERT_OK(db1->Put(wo, Key(i), p_v)); + } + ASSERT_OK(db1->Flush(FlushOptions())); + Slice bg("a"); + Slice ed("b"); + ASSERT_OK(db1->CompactRange(CompactRangeOptions(), &bg, &ed)); + + // Write the KVs to DB2 + std::vector value2; + memset(buf, 'b', 1000); + value2.resize(N); + for (int i = 0; i < N; i++) { + std::string p_v(buf, 1000); + value2[i] = p_v; + ASSERT_OK(db2->Put(wo, Key(i), p_v)); + } + ASSERT_OK(db2->Flush(FlushOptions())); + ASSERT_OK(db2->CompactRange(CompactRangeOptions(), &bg, &ed)); + + // do th eread for all the key value pairs, so all the blocks should be in + // cache + uint32_t start_insert = tmp_cache->GetInsertCount(); + uint32_t start_lookup = tmp_cache->GetLookupcount(); + ReadOptions ro; + std::string v; + for (int i = 0; i < N; i++) { + ASSERT_OK(db1->Get(ro, Key(i), &v)); + ASSERT_EQ(v, value1[i]); + } + for (int i = 0; i < N; i++) { + ASSERT_OK(db2->Get(ro, Key(i), &v)); + ASSERT_EQ(v, value2[i]); + } + uint32_t dump_insert = tmp_cache->GetInsertCount() - start_insert; + uint32_t dump_lookup = tmp_cache->GetLookupcount() - start_lookup; + ASSERT_EQ(128, + static_cast(dump_insert)); // the insert in the block cache + ASSERT_EQ(512, + static_cast(dump_lookup)); // the lookup in the block cache + // We have enough blocks in the block cache + + CacheDumpOptions cd_options; + cd_options.clock = fault_env_->GetSystemClock().get(); + std::string dump_path = db1->GetName() + "/cache_dump"; + std::unique_ptr dump_writer; + Status s = NewToFileCacheDumpWriter(fault_fs_, FileOptions(), dump_path, + &dump_writer); + ASSERT_OK(s); + std::unique_ptr cache_dumper; + s = NewDefaultCacheDumper(cd_options, cache, std::move(dump_writer), + &cache_dumper); + ASSERT_OK(s); + std::vector db_list; + db_list.push_back(db1); + s = cache_dumper->SetDumpFilter(db_list); + ASSERT_OK(s); + s = cache_dumper->DumpCacheEntriesToWriter(); + ASSERT_OK(s); + cache_dumper.reset(); + + // we have a new cache it is empty, then, before we do the Get, we do the + // dumpload + std::shared_ptr secondary_cache = + std::make_shared(2048 * 1024); + cache_opts.secondary_cache = secondary_cache; + tmp_cache = new LRUCacheWithStat( + cache_opts.capacity, cache_opts.num_shard_bits, + cache_opts.strict_capacity_limit, cache_opts.high_pri_pool_ratio, + cache_opts.memory_allocator, cache_opts.use_adaptive_mutex, + cache_opts.metadata_charge_policy, cache_opts.secondary_cache); + std::shared_ptr cache_new(tmp_cache); + table_options.block_cache = cache_new; + table_options.block_size = 4 * 1024; + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + + // Start the cache loading process + start_insert = secondary_cache->num_inserts(); + start_lookup = secondary_cache->num_lookups(); + std::unique_ptr dump_reader; + s = NewFromFileCacheDumpReader(fault_fs_, FileOptions(), dump_path, + &dump_reader); + ASSERT_OK(s); + std::unique_ptr cache_loader; + s = NewDefaultCacheDumpedLoader(cd_options, table_options, secondary_cache, + std::move(dump_reader), &cache_loader); + ASSERT_OK(s); + s = cache_loader->RestoreCacheEntriesToSecondaryCache(); + ASSERT_OK(s); + uint32_t load_insert = secondary_cache->num_inserts() - start_insert; + uint32_t load_lookup = secondary_cache->num_lookups() - start_lookup; + // check the number we inserted + ASSERT_EQ(64, static_cast(load_insert)); + ASSERT_EQ(0, static_cast(load_lookup)); + ASSERT_OK(s); + + ASSERT_OK(db1->Close()); + delete db1; + ASSERT_OK(DB::Open(options, dbname1, &db1)); + + // After load, we do the Get again. To validate the cache, we do not allow any + // I/O, so we set the file system to false. + IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); + fault_fs_->SetFilesystemActive(false, error_msg); + start_insert = secondary_cache->num_inserts(); + start_lookup = secondary_cache->num_lookups(); + uint32_t cache_insert = tmp_cache->GetInsertCount(); + uint32_t cache_lookup = tmp_cache->GetLookupcount(); + for (int i = 0; i < N; i++) { + ASSERT_OK(db1->Get(ro, Key(i), &v)); + ASSERT_EQ(v, value1[i]); + } + uint32_t final_insert = secondary_cache->num_inserts() - start_insert; + uint32_t final_lookup = secondary_cache->num_lookups() - start_lookup; + // no insert to secondary cache + ASSERT_EQ(0, static_cast(final_insert)); + // lookup the secondary to get all blocks + ASSERT_EQ(64, static_cast(final_lookup)); + uint32_t block_insert = tmp_cache->GetInsertCount() - cache_insert; + uint32_t block_lookup = tmp_cache->GetLookupcount() - cache_lookup; + // Check the new block cache insert and lookup, should be no insert since all + // blocks are from the secondary cache. + ASSERT_EQ(0, static_cast(block_insert)); + ASSERT_EQ(256, static_cast(block_lookup)); + fault_fs_->SetFailGetUniqueId(false); + fault_fs_->SetFilesystemActive(true); + delete db1; + delete db2; + ASSERT_OK(DestroyDB(dbname1, options)); + ASSERT_OK(DestroyDB(dbname2, options)); +} + +// Test the option not to use the secondary cache in a certain DB. +TEST_F(DBSecondaryCacheTest, TestSecondaryCacheOptionBasic) { + LRUCacheOptions opts(4 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + fault_fs_->SetFailGetUniqueId(true); + options.lowest_used_cache_tier = CacheTier::kVolatileTier; + + // Set the file paranoid check, so after flush, the file will be read + // all the blocks will be accessed. + options.paranoid_file_checks = true; + DestroyAndReopen(options); + std::string session_id; + ASSERT_OK(db_->GetDbSessionId(session_id)); + secondary_cache->SetDbSessionId(session_id); + Random rnd(301); + const int N = 6; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(Put(Key(i + 70), p_v)); + } + + ASSERT_OK(Flush()); + + // Flush will trigger the paranoid check and read blocks. But only block cache + // will be read. No operations for secondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + Compact("a", "z"); + + // Compaction will also insert and evict blocks, no operations to the block + // cache. No operations for secondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + std::string v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + + // Check the data in first block. Cache miss, direclty read from SST file. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + + // Check the second block. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + + // block cache hit + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + v = Get(Key(70)); + ASSERT_EQ(1007, v.size()); + + // Check the first block in the second SST file. Cache miss and trigger SST + // file read. No operations for secondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + v = Get(Key(75)); + ASSERT_EQ(1007, v.size()); + + // Check the second block in the second SST file. Cache miss and trigger SST + // file read. No operations for secondary cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + Destroy(options); +} + +// We disable the secondary cache in DBOptions at first. Close and reopen the DB +// with new options, which set the lowest_used_cache_tier to +// kNonVolatileBlockTier. So secondary cache will be used. +TEST_F(DBSecondaryCacheTest, TestSecondaryCacheOptionChange) { + LRUCacheOptions opts(4 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + fault_fs_->SetFailGetUniqueId(true); + options.lowest_used_cache_tier = CacheTier::kVolatileTier; + + // Set the file paranoid check, so after flush, the file will be read + // all the blocks will be accessed. + options.paranoid_file_checks = true; + DestroyAndReopen(options); + std::string session_id; + ASSERT_OK(db_->GetDbSessionId(session_id)); + secondary_cache->SetDbSessionId(session_id); + Random rnd(301); + const int N = 6; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(Put(Key(i + 70), p_v)); + } + + ASSERT_OK(Flush()); + + // Flush will trigger the paranoid check and read blocks. But only block cache + // will be read. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + Compact("a", "z"); + + // Compaction will also insert and evict blocks, no operations to the block + // cache. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + std::string v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + + // Check the data in first block. Cache miss, direclty read from SST file. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + + // Check the second block. + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + + // block cache hit + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + + // Change the option to enable secondary cache after we Reopen the DB + options.lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier; + Reopen(options); + + v = Get(Key(70)); + ASSERT_EQ(1007, v.size()); + + // Enable the secondary cache, trigger lookup of the first block in second SST + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 1u); + + v = Get(Key(75)); + ASSERT_EQ(1007, v.size()); + + // trigger lookup of the second block in second SST + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + Destroy(options); +} + +// Two DB test. We create 2 DBs sharing the same block cache and secondary +// cache. We diable the secondary cache option for DB2. +TEST_F(DBSecondaryCacheTest, TestSecondaryCacheOptionTwoDB) { + LRUCacheOptions opts(4 * 1024, 0, false, 0.5, nullptr, + kDefaultToAdaptiveMutex, kDontChargeCacheMetadata); + std::shared_ptr secondary_cache( + new TestSecondaryCache(2048 * 1024)); + opts.secondary_cache = secondary_cache; + std::shared_ptr cache = NewLRUCache(opts); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.block_size = 4 * 1024; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = fault_env_.get(); + options.paranoid_file_checks = true; + std::string dbname1 = test::PerThreadDBPath("db_t_1"); + ASSERT_OK(DestroyDB(dbname1, options)); + DB* db1 = nullptr; + ASSERT_OK(DB::Open(options, dbname1, &db1)); + std::string dbname2 = test::PerThreadDBPath("db_t_2"); + ASSERT_OK(DestroyDB(dbname2, options)); + DB* db2 = nullptr; + Options options2 = options; + options2.lowest_used_cache_tier = CacheTier::kVolatileTier; + ASSERT_OK(DB::Open(options2, dbname2, &db2)); + fault_fs_->SetFailGetUniqueId(true); + + // Set the file paranoid check, so after flush, the file will be read + // all the blocks will be accessed. + std::string session_id; + ASSERT_OK(db1->GetDbSessionId(session_id)); + secondary_cache->SetDbSessionId(session_id); + + WriteOptions wo; + Random rnd(301); + const int N = 6; + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(db1->Put(wo, Key(i), p_v)); + } + + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 0u); + ASSERT_OK(db1->Flush(FlushOptions())); + + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + for (int i = 0; i < N; i++) { + std::string p_v = rnd.RandomString(1007); + ASSERT_OK(db2->Put(wo, Key(i), p_v)); + } + + // No change in the secondary cache, since it is disabled in DB2 + ASSERT_EQ(secondary_cache->num_inserts(), 0u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + ASSERT_OK(db2->Flush(FlushOptions())); + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + Slice bg("a"); + Slice ed("b"); + ASSERT_OK(db1->CompactRange(CompactRangeOptions(), &bg, &ed)); + ASSERT_OK(db2->CompactRange(CompactRangeOptions(), &bg, &ed)); + + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 2u); + + ReadOptions ro; + std::string v; + ASSERT_OK(db1->Get(ro, Key(0), &v)); + ASSERT_EQ(1007, v.size()); + + // DB 1 has lookup block 1 and it is miss in block cache, trigger secondary + // cache lookup + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 3u); + + ASSERT_OK(db1->Get(ro, Key(5), &v)); + ASSERT_EQ(1007, v.size()); + + // DB 1 lookup the second block and it is miss in block cache, trigger + // secondary cache lookup + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 4u); + + ASSERT_OK(db2->Get(ro, Key(0), &v)); + ASSERT_EQ(1007, v.size()); + + // For db2, it is not enabled with secondary cache, so no search in the + // secondary cache + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 4u); + + ASSERT_OK(db2->Get(ro, Key(5), &v)); + ASSERT_EQ(1007, v.size()); + + // For db2, it is not enabled with secondary cache, so no search in the + // secondary cache + ASSERT_EQ(secondary_cache->num_inserts(), 1u); + ASSERT_EQ(secondary_cache->num_lookups(), 4u); + + fault_fs_->SetFailGetUniqueId(false); + fault_fs_->SetFilesystemActive(true); + delete db1; + delete db2; + ASSERT_OK(DestroyDB(dbname1, options)); + ASSERT_OK(DestroyDB(dbname2, options)); +} + +#endif // ROCKSDB_LITE + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/sharded_cache.cc mariadb-10.11.13/storage/rocksdb/rocksdb/cache/sharded_cache.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/sharded_cache.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/sharded_cache.cc 2025-05-19 16:14:27.000000000 +0000 @@ -9,53 +9,96 @@ #include "cache/sharded_cache.h" -#include +#include +#include +#include +#include "util/hash.h" +#include "util/math.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { +namespace { + +inline uint32_t HashSlice(const Slice& s) { + return Lower32of64(GetSliceNPHash64(s)); +} + +} // namespace + ShardedCache::ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, std::shared_ptr allocator) : Cache(std::move(allocator)), - num_shard_bits_(num_shard_bits), + shard_mask_((uint32_t{1} << num_shard_bits) - 1), capacity_(capacity), strict_capacity_limit_(strict_capacity_limit), last_id_(1) {} void ShardedCache::SetCapacity(size_t capacity) { - int num_shards = 1 << num_shard_bits_; + uint32_t num_shards = GetNumShards(); const size_t per_shard = (capacity + (num_shards - 1)) / num_shards; MutexLock l(&capacity_mutex_); - for (int s = 0; s < num_shards; s++) { + for (uint32_t s = 0; s < num_shards; s++) { GetShard(s)->SetCapacity(per_shard); } capacity_ = capacity; } void ShardedCache::SetStrictCapacityLimit(bool strict_capacity_limit) { - int num_shards = 1 << num_shard_bits_; + uint32_t num_shards = GetNumShards(); MutexLock l(&capacity_mutex_); - for (int s = 0; s < num_shards; s++) { + for (uint32_t s = 0; s < num_shards; s++) { GetShard(s)->SetStrictCapacityLimit(strict_capacity_limit); } strict_capacity_limit_ = strict_capacity_limit; } Status ShardedCache::Insert(const Slice& key, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value), - Handle** handle, Priority priority) { + DeleterFn deleter, Handle** handle, + Priority priority) { uint32_t hash = HashSlice(key); return GetShard(Shard(hash)) ->Insert(key, hash, value, charge, deleter, handle, priority); } +Status ShardedCache::Insert(const Slice& key, void* value, + const CacheItemHelper* helper, size_t charge, + Handle** handle, Priority priority) { + uint32_t hash = HashSlice(key); + if (!helper) { + return Status::InvalidArgument(); + } + return GetShard(Shard(hash)) + ->Insert(key, hash, value, helper, charge, handle, priority); +} + Cache::Handle* ShardedCache::Lookup(const Slice& key, Statistics* /*stats*/) { uint32_t hash = HashSlice(key); return GetShard(Shard(hash))->Lookup(key, hash); } +Cache::Handle* ShardedCache::Lookup(const Slice& key, + const CacheItemHelper* helper, + const CreateCallback& create_cb, + Priority priority, bool wait, + Statistics* stats) { + uint32_t hash = HashSlice(key); + return GetShard(Shard(hash)) + ->Lookup(key, hash, helper, create_cb, priority, wait, stats); +} + +bool ShardedCache::IsReady(Handle* handle) { + uint32_t hash = GetHash(handle); + return GetShard(Shard(hash))->IsReady(handle); +} + +void ShardedCache::Wait(Handle* handle) { + uint32_t hash = GetHash(handle); + GetShard(Shard(hash))->Wait(handle); +} + bool ShardedCache::Ref(Handle* handle) { uint32_t hash = GetHash(handle); return GetShard(Shard(hash))->Ref(handle); @@ -66,6 +109,11 @@ return GetShard(Shard(hash))->Release(handle, force_erase); } +bool ShardedCache::Release(Handle* handle, bool useful, bool force_erase) { + uint32_t hash = GetHash(handle); + return GetShard(Shard(hash))->Release(handle, useful, force_erase); +} + void ShardedCache::Erase(const Slice& key) { uint32_t hash = HashSlice(key); GetShard(Shard(hash))->Erase(key, hash); @@ -87,9 +135,9 @@ size_t ShardedCache::GetUsage() const { // We will not lock the cache when getting the usage from shards. - int num_shards = 1 << num_shard_bits_; + uint32_t num_shards = GetNumShards(); size_t usage = 0; - for (int s = 0; s < num_shards; s++) { + for (uint32_t s = 0; s < num_shards; s++) { usage += GetShard(s)->GetUsage(); } return usage; @@ -101,25 +149,42 @@ size_t ShardedCache::GetPinnedUsage() const { // We will not lock the cache when getting the usage from shards. - int num_shards = 1 << num_shard_bits_; + uint32_t num_shards = GetNumShards(); size_t usage = 0; - for (int s = 0; s < num_shards; s++) { + for (uint32_t s = 0; s < num_shards; s++) { usage += GetShard(s)->GetPinnedUsage(); } return usage; } -void ShardedCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t), - bool thread_safe) { - int num_shards = 1 << num_shard_bits_; - for (int s = 0; s < num_shards; s++) { - GetShard(s)->ApplyToAllCacheEntries(callback, thread_safe); - } +void ShardedCache::ApplyToAllEntries( + const std::function& callback, + const ApplyToAllEntriesOptions& opts) { + uint32_t num_shards = GetNumShards(); + // Iterate over part of each shard, rotating between shards, to + // minimize impact on latency of concurrent operations. + std::unique_ptr states(new uint32_t[num_shards]{}); + + uint32_t aepl_in_32 = static_cast( + std::min(size_t{UINT32_MAX}, opts.average_entries_per_lock)); + aepl_in_32 = std::min(aepl_in_32, uint32_t{1}); + + bool remaining_work; + do { + remaining_work = false; + for (uint32_t s = 0; s < num_shards; s++) { + if (states[s] != UINT32_MAX) { + GetShard(s)->ApplyToSomeEntries(callback, aepl_in_32, &states[s]); + remaining_work |= states[s] != UINT32_MAX; + } + } + } while (remaining_work); } void ShardedCache::EraseUnRefEntries() { - int num_shards = 1 << num_shard_bits_; - for (int s = 0; s < num_shards; s++) { + uint32_t num_shards = GetNumShards(); + for (uint32_t s = 0; s < num_shards; s++) { GetShard(s)->EraseUnRefEntries(); } } @@ -134,7 +199,8 @@ snprintf(buffer, kBufferSize, " capacity : %" ROCKSDB_PRIszt "\n", capacity_); ret.append(buffer); - snprintf(buffer, kBufferSize, " num_shard_bits : %d\n", num_shard_bits_); + snprintf(buffer, kBufferSize, " num_shard_bits : %d\n", + GetNumShardBits()); ret.append(buffer); snprintf(buffer, kBufferSize, " strict_capacity_limit : %d\n", strict_capacity_limit_); @@ -159,4 +225,8 @@ return num_shard_bits; } +int ShardedCache::GetNumShardBits() const { return BitsSetToOne(shard_mask_); } + +uint32_t ShardedCache::GetNumShards() const { return shard_mask_ + 1; } + } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cache/sharded_cache.h mariadb-10.11.13/storage/rocksdb/rocksdb/cache/sharded_cache.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/cache/sharded_cache.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cache/sharded_cache.h 2025-05-19 16:14:27.000000000 +0000 @@ -14,7 +14,6 @@ #include "port/port.h" #include "rocksdb/cache.h" -#include "util/hash.h" namespace ROCKSDB_NAMESPACE { @@ -24,20 +23,38 @@ CacheShard() = default; virtual ~CacheShard() = default; + using DeleterFn = Cache::DeleterFn; virtual Status Insert(const Slice& key, uint32_t hash, void* value, - size_t charge, - void (*deleter)(const Slice& key, void* value), + size_t charge, DeleterFn deleter, + Cache::Handle** handle, Cache::Priority priority) = 0; + virtual Status Insert(const Slice& key, uint32_t hash, void* value, + const Cache::CacheItemHelper* helper, size_t charge, Cache::Handle** handle, Cache::Priority priority) = 0; virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) = 0; + virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash, + const Cache::CacheItemHelper* helper, + const Cache::CreateCallback& create_cb, + Cache::Priority priority, bool wait, + Statistics* stats) = 0; + virtual bool Release(Cache::Handle* handle, bool useful, + bool force_erase) = 0; + virtual bool IsReady(Cache::Handle* handle) = 0; + virtual void Wait(Cache::Handle* handle) = 0; virtual bool Ref(Cache::Handle* handle) = 0; - virtual bool Release(Cache::Handle* handle, bool force_erase = false) = 0; + virtual bool Release(Cache::Handle* handle, bool force_erase) = 0; virtual void Erase(const Slice& key, uint32_t hash) = 0; virtual void SetCapacity(size_t capacity) = 0; virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0; virtual size_t GetUsage() const = 0; virtual size_t GetPinnedUsage() const = 0; - virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), - bool thread_safe) = 0; + // Handles iterating over roughly `average_entries_per_lock` entries, using + // `state` to somehow record where it last ended up. Caller initially uses + // *state == 0 and implementation sets *state = UINT32_MAX to indicate + // completion. + virtual void ApplyToSomeEntries( + const std::function& callback, + uint32_t average_entries_per_lock, uint32_t* state) = 0; virtual void EraseUnRefEntries() = 0; virtual std::string GetPrintableOptions() const { return ""; } void set_metadata_charge_policy( @@ -57,22 +74,29 @@ ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, std::shared_ptr memory_allocator = nullptr); virtual ~ShardedCache() = default; - virtual const char* Name() const override = 0; - virtual CacheShard* GetShard(int shard) = 0; - virtual const CacheShard* GetShard(int shard) const = 0; - virtual void* Value(Handle* handle) override = 0; - virtual size_t GetCharge(Handle* handle) const override = 0; + virtual CacheShard* GetShard(uint32_t shard) = 0; + virtual const CacheShard* GetShard(uint32_t shard) const = 0; virtual uint32_t GetHash(Handle* handle) const = 0; - virtual void DisownData() override = 0; virtual void SetCapacity(size_t capacity) override; virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override; virtual Status Insert(const Slice& key, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value), - Handle** handle, Priority priority) override; + DeleterFn deleter, Handle** handle, + Priority priority) override; + virtual Status Insert(const Slice& key, void* value, + const CacheItemHelper* helper, size_t chargge, + Handle** handle = nullptr, + Priority priority = Priority::LOW) override; virtual Handle* Lookup(const Slice& key, Statistics* stats) override; + virtual Handle* Lookup(const Slice& key, const CacheItemHelper* helper, + const CreateCallback& create_cb, Priority priority, + bool wait, Statistics* stats = nullptr) override; + virtual bool Release(Handle* handle, bool useful, + bool force_erase = false) override; + virtual bool IsReady(Handle* handle) override; + virtual void Wait(Handle* handle) override; virtual bool Ref(Handle* handle) override; virtual bool Release(Handle* handle, bool force_erase = false) override; virtual void Erase(const Slice& key) override; @@ -82,24 +106,21 @@ virtual size_t GetUsage() const override; virtual size_t GetUsage(Handle* handle) const override; virtual size_t GetPinnedUsage() const override; - virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), - bool thread_safe) override; + virtual void ApplyToAllEntries( + const std::function& callback, + const ApplyToAllEntriesOptions& opts) override; virtual void EraseUnRefEntries() override; virtual std::string GetPrintableOptions() const override; - int GetNumShardBits() const { return num_shard_bits_; } - - private: - static inline uint32_t HashSlice(const Slice& s) { - return static_cast(GetSliceNPHash64(s)); - } + int GetNumShardBits() const; + uint32_t GetNumShards() const; - uint32_t Shard(uint32_t hash) { - // Note, hash >> 32 yields hash in gcc, not the zero we expect! - return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0; - } + protected: + inline uint32_t Shard(uint32_t hash) { return hash & shard_mask_; } - int num_shard_bits_; + private: + const uint32_t shard_mask_; mutable port::Mutex capacity_mutex_; size_t capacity_; bool strict_capacity_limit_; diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in --- mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in 2025-05-19 16:14:27.000000000 +0000 @@ -1,3 +1,54 @@ @PACKAGE_INIT@ + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/modules") + +include(CMakeFindDependencyMacro) + +set(GFLAGS_USE_TARGET_NAMESPACE @GFLAGS_USE_TARGET_NAMESPACE@) + +if(@WITH_JEMALLOC@) + find_dependency(JeMalloc) +endif() + +if(@WITH_GFLAGS@) + find_dependency(gflags CONFIG) + if(NOT gflags_FOUND) + find_dependency(gflags) + endif() +endif() + +if(@WITH_SNAPPY@) + find_dependency(Snappy CONFIG) + if(NOT Snappy_FOUND) + find_dependency(Snappy) + endif() +endif() + +if(@WITH_ZLIB@) + find_dependency(ZLIB) +endif() + +if(@WITH_BZ2@) + find_dependency(BZip2) +endif() + +if(@WITH_LZ4@) + find_dependency(lz4) +endif() + +if(@WITH_ZSTD@) + find_dependency(zstd) +endif() + +if(@WITH_NUMA@) + find_dependency(NUMA) +endif() + +if(@WITH_TBB@) + find_dependency(TBB) +endif() + +find_dependency(Threads) + include("${CMAKE_CURRENT_LIST_DIR}/RocksDBTargets.cmake") check_required_components(RocksDB) diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake --- mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,7 @@ +macro(get_cxx_std_flags FLAGS_VARIABLE) + if( CMAKE_CXX_STANDARD_REQUIRED ) + set(${FLAGS_VARIABLE} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_STANDARD_COMPILE_OPTION}) + else() + set(${FLAGS_VARIABLE} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_EXTENSION_COMPILE_OPTION}) + endif() +endmacro() diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake --- mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,29 @@ +# - Find Snappy +# Find the snappy compression library and includes +# +# Snappy_INCLUDE_DIRS - where to find snappy.h, etc. +# Snappy_LIBRARIES - List of libraries when using snappy. +# Snappy_FOUND - True if snappy found. + +find_path(Snappy_INCLUDE_DIRS + NAMES snappy.h + HINTS ${snappy_ROOT_DIR}/include) + +find_library(Snappy_LIBRARIES + NAMES snappy + HINTS ${snappy_ROOT_DIR}/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Snappy DEFAULT_MSG Snappy_LIBRARIES Snappy_INCLUDE_DIRS) + +mark_as_advanced( + Snappy_LIBRARIES + Snappy_INCLUDE_DIRS) + +if(Snappy_FOUND AND NOT (TARGET Snappy::snappy)) + add_library (Snappy::snappy UNKNOWN IMPORTED) + set_target_properties(Snappy::snappy + PROPERTIES + IMPORTED_LOCATION ${Snappy_LIBRARIES} + INTERFACE_INCLUDE_DIRECTORIES ${Snappy_INCLUDE_DIRS}) +endif() diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/Findgflags.cmake mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Findgflags.cmake --- mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/Findgflags.cmake 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Findgflags.cmake 2025-05-19 16:14:27.000000000 +0000 @@ -1,8 +1,8 @@ # - Find gflags library # Find the gflags includes and library # -# gflags_INCLUDE_DIR - where to find gflags.h. -# gflags_LIBRARIES - List of libraries when using gflags. +# GFLAGS_INCLUDE_DIR - where to find gflags.h. +# GFLAGS_LIBRARIES - List of libraries when using gflags. # gflags_FOUND - True if gflags found. find_path(GFLAGS_INCLUDE_DIR diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/Findsnappy.cmake mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Findsnappy.cmake --- mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/Findsnappy.cmake 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Findsnappy.cmake 1970-01-01 00:00:00.000000000 +0000 @@ -1,29 +0,0 @@ -# - Find Snappy -# Find the snappy compression library and includes -# -# snappy_INCLUDE_DIRS - where to find snappy.h, etc. -# snappy_LIBRARIES - List of libraries when using snappy. -# snappy_FOUND - True if snappy found. - -find_path(snappy_INCLUDE_DIRS - NAMES snappy.h - HINTS ${snappy_ROOT_DIR}/include) - -find_library(snappy_LIBRARIES - NAMES snappy - HINTS ${snappy_ROOT_DIR}/lib) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(snappy DEFAULT_MSG snappy_LIBRARIES snappy_INCLUDE_DIRS) - -mark_as_advanced( - snappy_LIBRARIES - snappy_INCLUDE_DIRS) - -if(snappy_FOUND AND NOT (TARGET snappy::snappy)) - add_library (snappy::snappy UNKNOWN IMPORTED) - set_target_properties(snappy::snappy - PROPERTIES - IMPORTED_LOCATION ${snappy_LIBRARIES} - INTERFACE_INCLUDE_DIRECTORIES ${snappy_INCLUDE_DIRS}) -endif() diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/Finduring.cmake mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Finduring.cmake --- mariadb-10.11.11/storage/rocksdb/rocksdb/cmake/modules/Finduring.cmake 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/cmake/modules/Finduring.cmake 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,26 @@ +# - Find liburing +# +# uring_INCLUDE_DIR - Where to find liburing.h +# uring_LIBRARIES - List of libraries when using uring. +# uring_FOUND - True if uring found. + +find_path(uring_INCLUDE_DIR + NAMES liburing.h) +find_library(uring_LIBRARIES + NAMES liburing.a liburing) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(uring + DEFAULT_MSG uring_LIBRARIES uring_INCLUDE_DIR) + +mark_as_advanced( + uring_INCLUDE_DIR + uring_LIBRARIES) + +if(uring_FOUND AND NOT TARGET uring::uring) + add_library(uring::uring UNKNOWN IMPORTED) + set_target_properties(uring::uring PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${uring_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${uring_LIBRARIES}") +endif() diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/coverage/coverage_test.sh mariadb-10.11.13/storage/rocksdb/rocksdb/coverage/coverage_test.sh --- mariadb-10.11.11/storage/rocksdb/rocksdb/coverage/coverage_test.sh 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/coverage/coverage_test.sh 2025-05-19 16:14:27.000000000 +0000 @@ -12,21 +12,24 @@ ROOT=".." # Fetch right version of gcov if [ -d /mnt/gvfs/third-party -a -z "$CXX" ]; then - source $ROOT/build_tools/fbcode_config.sh + source $ROOT/build_tools/fbcode_config_platform007.sh GCOV=$GCC_BASE/bin/gcov else GCOV=$(which gcov) fi +echo -e "Using $GCOV" COVERAGE_DIR="$PWD/COVERAGE_REPORT" mkdir -p $COVERAGE_DIR # Find all gcno files to generate the coverage report +PYTHON=${1:-`which python3`} +echo -e "Using $PYTHON" GCNO_FILES=`find $ROOT -name "*.gcno"` $GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null | # Parse the raw gcov report to more human readable form. - python $ROOT/coverage/parse_gcov_output.py | + $PYTHON $ROOT/coverage/parse_gcov_output.py | # Write the output to both stdout and report file. tee $COVERAGE_DIR/coverage_report_all.txt && echo -e "Generated coverage report for all files: $COVERAGE_DIR/coverage_report_all.txt\n" @@ -41,7 +44,7 @@ echo -e "Recently updated files: $LATEST_FILES\n" > $RECENT_REPORT $GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null | - python $ROOT/coverage/parse_gcov_output.py -interested-files $LATEST_FILES | + $PYTHON $ROOT/coverage/parse_gcov_output.py -interested-files $LATEST_FILES | tee -a $RECENT_REPORT && echo -e "Generated coverage report for recently updated files: $RECENT_REPORT\n" diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/coverage/parse_gcov_output.py mariadb-10.11.13/storage/rocksdb/rocksdb/coverage/parse_gcov_output.py --- mariadb-10.11.11/storage/rocksdb/rocksdb/coverage/parse_gcov_output.py 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/coverage/parse_gcov_output.py 2025-05-19 16:14:27.000000000 +0000 @@ -1,10 +1,12 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + +from __future__ import print_function + +import optparse import re import sys -from optparse import OptionParser - # the gcov report follows certain pattern. Each file will have two lines # of report, from which we can extract the file name, total lines and coverage # percentage. @@ -48,7 +50,7 @@ def get_option_parser(): usage = "Parse the gcov output and generate more human-readable code " +\ "coverage report." - parser = OptionParser(usage) + parser = optparse.OptionParser(usage) parser.add_option( "--interested-files", "-i", @@ -73,8 +75,8 @@ header_template = \ "%" + str(max_file_name_length) + "s\t%s\t%s" separator = "-" * (max_file_name_length + 10 + 20) - print header_template % ("Filename", "Coverage", "Lines") # noqa: E999 T25377293 Grandfathered in - print separator + print(header_template % ("Filename", "Coverage", "Lines")) # noqa: E999 T25377293 Grandfathered in + print(separator) # -- Print body # template for printing coverage report for each file. @@ -82,12 +84,12 @@ for fname, coverage_info in per_file_coverage.items(): coverage, lines = coverage_info - print record_template % (fname, coverage, lines) + print(record_template % (fname, coverage, lines)) # -- Print footer if total_coverage: - print separator - print record_template % ("Total", total_coverage[0], total_coverage[1]) + print(separator) + print(record_template % ("Total", total_coverage[0], total_coverage[1])) def report_coverage(): parser = get_option_parser() @@ -111,7 +113,7 @@ total_coverage = None if not len(per_file_coverage): - print >> sys.stderr, "Cannot find coverage info for the given files." + print("Cannot find coverage info for the given files.", file=sys.stderr) return display_file_coverage(per_file_coverage, total_coverage) diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc 2025-05-19 16:14:27.000000000 +0000 @@ -30,21 +30,20 @@ return db_iter_->GetProperty(prop_name, prop); } -void ArenaWrappedDBIter::Init(Env* env, const ReadOptions& read_options, - const ImmutableCFOptions& cf_options, - const MutableCFOptions& mutable_cf_options, - const SequenceNumber& sequence, - uint64_t max_sequential_skip_in_iteration, - uint64_t version_number, - ReadCallback* read_callback, DBImpl* db_impl, - ColumnFamilyData* cfd, bool allow_blob, - bool allow_refresh) { +void ArenaWrappedDBIter::Init( + Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions, + const MutableCFOptions& mutable_cf_options, const Version* version, + const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iteration, + uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl, + ColumnFamilyData* cfd, bool expose_blob_index, bool allow_refresh) { auto mem = arena_.AllocateAligned(sizeof(DBIter)); - db_iter_ = new (mem) DBIter(env, read_options, cf_options, mutable_cf_options, - cf_options.user_comparator, nullptr, sequence, - true, max_sequential_skip_in_iteration, - read_callback, db_impl, cfd, allow_blob); + db_iter_ = + new (mem) DBIter(env, read_options, ioptions, mutable_cf_options, + ioptions.user_comparator, /* iter */ nullptr, version, + sequence, true, max_sequential_skip_in_iteration, + read_callback, db_impl, cfd, expose_blob_index); sv_number_ = version_number; + read_options_ = read_options; allow_refresh_ = allow_refresh; } @@ -56,48 +55,74 @@ // TODO(yiwu): For last_seq_same_as_publish_seq_==false, this is not the // correct behavior. Will be corrected automatically when we take a snapshot // here for the case of WritePreparedTxnDB. - SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); uint64_t cur_sv_number = cfd_->GetSuperVersionNumber(); - if (sv_number_ != cur_sv_number) { - Env* env = db_iter_->env(); - db_iter_->~DBIter(); - arena_.~Arena(); - new (&arena_) Arena(); - - SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); - if (read_callback_) { - read_callback_->Refresh(latest_seq); + TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1"); + TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2"); + while (true) { + if (sv_number_ != cur_sv_number) { + Env* env = db_iter_->env(); + db_iter_->~DBIter(); + arena_.~Arena(); + new (&arena_) Arena(); + + SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); + SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); + if (read_callback_) { + read_callback_->Refresh(latest_seq); + } + Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options, + sv->current, latest_seq, + sv->mutable_cf_options.max_sequential_skip_in_iterations, + cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_, + allow_refresh_); + + InternalIterator* internal_iter = db_impl_->NewInternalIterator( + read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(), + latest_seq, /* allow_unprepared_value */ true); + SetIterUnderDBIter(internal_iter); + break; + } else { + SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); + // Refresh range-tombstones in MemTable + if (!read_options_.ignore_range_deletions) { + SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_); + ReadRangeDelAggregator* range_del_agg = + db_iter_->GetRangeDelAggregator(); + std::unique_ptr range_del_iter; + range_del_iter.reset( + sv->mem->NewRangeTombstoneIterator(read_options_, latest_seq)); + range_del_agg->AddTombstones(std::move(range_del_iter)); + cfd_->ReturnThreadLocalSuperVersion(sv); + } + // Refresh latest sequence number + db_iter_->set_sequence(latest_seq); + db_iter_->set_valid(false); + // Check again if the latest super version number is changed + uint64_t latest_sv_number = cfd_->GetSuperVersionNumber(); + if (latest_sv_number != cur_sv_number) { + // If the super version number is changed after refreshing, + // fallback to Re-Init the InternalIterator + cur_sv_number = latest_sv_number; + continue; + } + break; } - Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options, - latest_seq, sv->mutable_cf_options.max_sequential_skip_in_iterations, - cur_sv_number, read_callback_, db_impl_, cfd_, allow_blob_, - allow_refresh_); - - InternalIterator* internal_iter = db_impl_->NewInternalIterator( - read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(), - latest_seq); - SetIterUnderDBIter(internal_iter); - } else { - db_iter_->set_sequence(latest_seq); - db_iter_->set_valid(false); } return Status::OK(); } ArenaWrappedDBIter* NewArenaWrappedDbIterator( - Env* env, const ReadOptions& read_options, - const ImmutableCFOptions& cf_options, - const MutableCFOptions& mutable_cf_options, const SequenceNumber& sequence, - uint64_t max_sequential_skip_in_iterations, uint64_t version_number, - ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd, - bool allow_blob, bool allow_refresh) { + Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions, + const MutableCFOptions& mutable_cf_options, const Version* version, + const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, + uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl, + ColumnFamilyData* cfd, bool expose_blob_index, bool allow_refresh) { ArenaWrappedDBIter* iter = new ArenaWrappedDBIter(); - iter->Init(env, read_options, cf_options, mutable_cf_options, sequence, + iter->Init(env, read_options, ioptions, mutable_cf_options, version, sequence, max_sequential_skip_in_iterations, version_number, read_callback, - db_impl, cfd, allow_blob, allow_refresh); + db_impl, cfd, expose_blob_index, allow_refresh); if (db_impl != nullptr && cfd != nullptr && allow_refresh) { - iter->StoreRefreshInfo(read_options, db_impl, cfd, read_callback, - allow_blob); + iter->StoreRefreshInfo(db_impl, cfd, read_callback, expose_blob_index); } return iter; diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.h 2025-05-19 16:14:27.000000000 +0000 @@ -12,7 +12,6 @@ #include #include "db/db_impl/db_impl.h" #include "db/db_iter.h" -#include "db/dbformat.h" #include "db/range_del_aggregator.h" #include "memory/arena.h" #include "options/cf_options.h" @@ -23,6 +22,7 @@ namespace ROCKSDB_NAMESPACE { class Arena; +class Version; // A wrapper iterator which wraps DB Iterator and the arena, with which the DB // iterator is supposed to be allocated. This class is used as an entry point of @@ -33,7 +33,13 @@ // the same as the inner DBIter. class ArenaWrappedDBIter : public Iterator { public: - virtual ~ArenaWrappedDBIter() { db_iter_->~DBIter(); } + ~ArenaWrappedDBIter() override { + if (db_iter_ != nullptr) { + db_iter_->~DBIter(); + } else { + assert(false); + } + } // Get the arena to be used to allocate memory for DBIter to be wrapped, // as well as child iterators in it. @@ -41,6 +47,7 @@ virtual ReadRangeDelAggregator* GetRangeDelAggregator() { return db_iter_->GetRangeDelAggregator(); } + const ReadOptions& GetReadOptions() { return read_options_; } // Set the internal iterator wrapped inside the DB Iterator. Usually it is // a merging iterator. @@ -51,6 +58,8 @@ bool Valid() const override { return db_iter_->Valid(); } void SeekToFirst() override { db_iter_->SeekToFirst(); } void SeekToLast() override { db_iter_->SeekToLast(); } + // 'target' does not contain timestamp, even if user timestamp feature is + // enabled. void Seek(const Slice& target) override { db_iter_->Seek(target); } void SeekForPrev(const Slice& target) override { db_iter_->SeekForPrev(target); @@ -60,6 +69,7 @@ Slice key() const override { return db_iter_->key(); } Slice value() const override { return db_iter_->value(); } Status status() const override { return db_iter_->status(); } + Slice timestamp() const override { return db_iter_->timestamp(); } bool IsBlob() const { return db_iter_->IsBlob(); } Status GetProperty(std::string prop_name, std::string* prop) override; @@ -67,34 +77,32 @@ Status Refresh() override; void Init(Env* env, const ReadOptions& read_options, - const ImmutableCFOptions& cf_options, - const MutableCFOptions& mutable_cf_options, + const ImmutableOptions& ioptions, + const MutableCFOptions& mutable_cf_options, const Version* version, const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd, - bool allow_blob, bool allow_refresh); + bool expose_blob_index, bool allow_refresh); // Store some parameters so we can refresh the iterator at a later point // with these same params - void StoreRefreshInfo(const ReadOptions& read_options, DBImpl* db_impl, - ColumnFamilyData* cfd, ReadCallback* read_callback, - bool allow_blob) { - read_options_ = read_options; + void StoreRefreshInfo(DBImpl* db_impl, ColumnFamilyData* cfd, + ReadCallback* read_callback, bool expose_blob_index) { db_impl_ = db_impl; cfd_ = cfd; read_callback_ = read_callback; - allow_blob_ = allow_blob; + expose_blob_index_ = expose_blob_index; } private: - DBIter* db_iter_; + DBIter* db_iter_ = nullptr; Arena arena_; uint64_t sv_number_; ColumnFamilyData* cfd_ = nullptr; DBImpl* db_impl_ = nullptr; ReadOptions read_options_; ReadCallback* read_callback_; - bool allow_blob_ = false; + bool expose_blob_index_ = false; bool allow_refresh_ = true; }; @@ -102,11 +110,10 @@ // `db_impl` and `cfd` are used for reneweal. If left null, renewal will not // be supported. extern ArenaWrappedDBIter* NewArenaWrappedDbIterator( - Env* env, const ReadOptions& read_options, - const ImmutableCFOptions& cf_options, - const MutableCFOptions& mutable_cf_options, const SequenceNumber& sequence, - uint64_t max_sequential_skip_in_iterations, uint64_t version_number, - ReadCallback* read_callback, DBImpl* db_impl = nullptr, - ColumnFamilyData* cfd = nullptr, bool allow_blob = false, - bool allow_refresh = true); + Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions, + const MutableCFOptions& mutable_cf_options, const Version* version, + const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations, + uint64_t version_number, ReadCallback* read_callback, + DBImpl* db_impl = nullptr, ColumnFamilyData* cfd = nullptr, + bool expose_blob_index = false, bool allow_refresh = true); } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_constants.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_constants.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_constants.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_constants.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,16 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +constexpr uint64_t kInvalidBlobFileNumber = 0; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,146 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "db/blob/blob_garbage_meter.h" +#include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/status.h" +#include "table/internal_iterator.h" +#include "test_util/sync_point.h" + +namespace ROCKSDB_NAMESPACE { + +// An internal iterator that passes each key-value encountered to +// BlobGarbageMeter as inflow in order to measure the total number and size of +// blobs in the compaction input on a per-blob file basis. +class BlobCountingIterator : public InternalIterator { + public: + BlobCountingIterator(InternalIterator* iter, + BlobGarbageMeter* blob_garbage_meter) + : iter_(iter), blob_garbage_meter_(blob_garbage_meter) { + assert(iter_); + assert(blob_garbage_meter_); + + UpdateAndCountBlobIfNeeded(); + } + + bool Valid() const override { return iter_->Valid() && status_.ok(); } + + void SeekToFirst() override { + iter_->SeekToFirst(); + UpdateAndCountBlobIfNeeded(); + } + + void SeekToLast() override { + iter_->SeekToLast(); + UpdateAndCountBlobIfNeeded(); + } + + void Seek(const Slice& target) override { + iter_->Seek(target); + UpdateAndCountBlobIfNeeded(); + } + + void SeekForPrev(const Slice& target) override { + iter_->SeekForPrev(target); + UpdateAndCountBlobIfNeeded(); + } + + void Next() override { + assert(Valid()); + + iter_->Next(); + UpdateAndCountBlobIfNeeded(); + } + + bool NextAndGetResult(IterateResult* result) override { + assert(Valid()); + + const bool res = iter_->NextAndGetResult(result); + UpdateAndCountBlobIfNeeded(); + return res; + } + + void Prev() override { + assert(Valid()); + + iter_->Prev(); + UpdateAndCountBlobIfNeeded(); + } + + Slice key() const override { + assert(Valid()); + return iter_->key(); + } + + Slice user_key() const override { + assert(Valid()); + return iter_->user_key(); + } + + Slice value() const override { + assert(Valid()); + return iter_->value(); + } + + Status status() const override { return status_; } + + bool PrepareValue() override { + assert(Valid()); + return iter_->PrepareValue(); + } + + bool MayBeOutOfLowerBound() override { + assert(Valid()); + return iter_->MayBeOutOfLowerBound(); + } + + IterBoundCheck UpperBoundCheckResult() override { + assert(Valid()); + return iter_->UpperBoundCheckResult(); + } + + void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { + iter_->SetPinnedItersMgr(pinned_iters_mgr); + } + + bool IsKeyPinned() const override { + assert(Valid()); + return iter_->IsKeyPinned(); + } + + bool IsValuePinned() const override { + assert(Valid()); + return iter_->IsValuePinned(); + } + + Status GetProperty(std::string prop_name, std::string* prop) override { + return iter_->GetProperty(prop_name, prop); + } + + private: + void UpdateAndCountBlobIfNeeded() { + assert(!iter_->Valid() || iter_->status().ok()); + + if (!iter_->Valid()) { + status_ = iter_->status(); + return; + } + + TEST_SYNC_POINT( + "BlobCountingIterator::UpdateAndCountBlobIfNeeded:ProcessInFlow"); + + status_ = blob_garbage_meter_->ProcessInFlow(key(), value()); + } + + InternalIterator* iter_; + BlobGarbageMeter* blob_garbage_meter_; + Status status_; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,326 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_counting_iterator.h" + +#include +#include + +#include "db/blob/blob_garbage_meter.h" +#include "db/blob/blob_index.h" +#include "db/blob/blob_log_format.h" +#include "db/dbformat.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" +#include "util/vector_iterator.h" + +namespace ROCKSDB_NAMESPACE { + +void CheckInFlow(const BlobGarbageMeter& blob_garbage_meter, + uint64_t blob_file_number, uint64_t count, uint64_t bytes) { + const auto& flows = blob_garbage_meter.flows(); + + const auto it = flows.find(blob_file_number); + if (it == flows.end()) { + ASSERT_EQ(count, 0); + ASSERT_EQ(bytes, 0); + return; + } + + const auto& in = it->second.GetInFlow(); + + ASSERT_EQ(in.GetCount(), count); + ASSERT_EQ(in.GetBytes(), bytes); +} + +TEST(BlobCountingIteratorTest, CountBlobs) { + // Note: the input consists of three key-values: two are blob references to + // different blob files, while the third one is a plain value. + constexpr char user_key0[] = "key0"; + constexpr char user_key1[] = "key1"; + constexpr char user_key2[] = "key2"; + + const std::vector keys{ + test::KeyStr(user_key0, 1, kTypeBlobIndex), + test::KeyStr(user_key1, 2, kTypeBlobIndex), + test::KeyStr(user_key2, 3, kTypeValue)}; + + constexpr uint64_t first_blob_file_number = 4; + constexpr uint64_t first_offset = 1000; + constexpr uint64_t first_size = 2000; + + std::string first_blob_index; + BlobIndex::EncodeBlob(&first_blob_index, first_blob_file_number, first_offset, + first_size, kNoCompression); + + constexpr uint64_t second_blob_file_number = 6; + constexpr uint64_t second_offset = 2000; + constexpr uint64_t second_size = 4000; + + std::string second_blob_index; + BlobIndex::EncodeBlob(&second_blob_index, second_blob_file_number, + second_offset, second_size, kNoCompression); + + const std::vector values{first_blob_index, second_blob_index, + "raw_value"}; + + assert(keys.size() == values.size()); + + VectorIterator input(keys, values); + BlobGarbageMeter blob_garbage_meter; + + BlobCountingIterator blob_counter(&input, &blob_garbage_meter); + + constexpr uint64_t first_expected_bytes = + first_size + + BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(user_key0) - 1); + constexpr uint64_t second_expected_bytes = + second_size + + BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(user_key1) - 1); + + // Call SeekToFirst and iterate forward + blob_counter.SeekToFirst(); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[0]); + ASSERT_EQ(blob_counter.user_key(), user_key0); + ASSERT_EQ(blob_counter.value(), values[0]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 1, + first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 0, 0); + + blob_counter.Next(); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[1]); + ASSERT_EQ(blob_counter.user_key(), user_key1); + ASSERT_EQ(blob_counter.value(), values[1]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 1, + first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 1, + second_expected_bytes); + + blob_counter.Next(); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[2]); + ASSERT_EQ(blob_counter.user_key(), user_key2); + ASSERT_EQ(blob_counter.value(), values[2]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 1, + first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 1, + second_expected_bytes); + + blob_counter.Next(); + ASSERT_FALSE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 1, + first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 1, + second_expected_bytes); + + // Do it again using NextAndGetResult + blob_counter.SeekToFirst(); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[0]); + ASSERT_EQ(blob_counter.user_key(), user_key0); + ASSERT_EQ(blob_counter.value(), values[0]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 2, + 2 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 1, + second_expected_bytes); + + { + IterateResult result; + ASSERT_TRUE(blob_counter.NextAndGetResult(&result)); + ASSERT_EQ(result.key, keys[1]); + ASSERT_EQ(blob_counter.user_key(), user_key1); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[1]); + ASSERT_EQ(blob_counter.value(), values[1]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 2, + 2 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 2, + 2 * second_expected_bytes); + } + + { + IterateResult result; + ASSERT_TRUE(blob_counter.NextAndGetResult(&result)); + ASSERT_EQ(result.key, keys[2]); + ASSERT_EQ(blob_counter.user_key(), user_key2); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[2]); + ASSERT_EQ(blob_counter.value(), values[2]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 2, + 2 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 2, + 2 * second_expected_bytes); + } + + { + IterateResult result; + ASSERT_FALSE(blob_counter.NextAndGetResult(&result)); + ASSERT_FALSE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 2, + 2 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 2, + 2 * second_expected_bytes); + } + + // Call SeekToLast and iterate backward + blob_counter.SeekToLast(); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[2]); + ASSERT_EQ(blob_counter.user_key(), user_key2); + ASSERT_EQ(blob_counter.value(), values[2]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 2, + 2 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 2, + 2 * second_expected_bytes); + + blob_counter.Prev(); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[1]); + ASSERT_EQ(blob_counter.user_key(), user_key1); + ASSERT_EQ(blob_counter.value(), values[1]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 2, + 2 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 3, + 3 * second_expected_bytes); + + blob_counter.Prev(); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[0]); + ASSERT_EQ(blob_counter.user_key(), user_key0); + ASSERT_EQ(blob_counter.value(), values[0]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 3, + 3 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 3, + 3 * second_expected_bytes); + + blob_counter.Prev(); + ASSERT_FALSE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 3, + 3 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 3, + 3 * second_expected_bytes); + + // Call Seek for all keys (plus one that's greater than all of them) + blob_counter.Seek(keys[0]); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[0]); + ASSERT_EQ(blob_counter.user_key(), user_key0); + ASSERT_EQ(blob_counter.value(), values[0]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 4, + 4 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 3, + 3 * second_expected_bytes); + + blob_counter.Seek(keys[1]); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[1]); + ASSERT_EQ(blob_counter.user_key(), user_key1); + ASSERT_EQ(blob_counter.value(), values[1]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 4, + 4 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 4, + 4 * second_expected_bytes); + + blob_counter.Seek(keys[2]); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[2]); + ASSERT_EQ(blob_counter.user_key(), user_key2); + ASSERT_EQ(blob_counter.value(), values[2]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 4, + 4 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 4, + 4 * second_expected_bytes); + + blob_counter.Seek("zzz"); + ASSERT_FALSE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 4, + 4 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 4, + 4 * second_expected_bytes); + + // Call SeekForPrev for all keys (plus one that's less than all of them) + blob_counter.SeekForPrev("aaa"); + ASSERT_FALSE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 4, + 4 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 4, + 4 * second_expected_bytes); + + blob_counter.SeekForPrev(keys[0]); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[0]); + ASSERT_EQ(blob_counter.user_key(), user_key0); + ASSERT_EQ(blob_counter.value(), values[0]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 5, + 5 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 4, + 4 * second_expected_bytes); + + blob_counter.SeekForPrev(keys[1]); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[1]); + ASSERT_EQ(blob_counter.user_key(), user_key1); + ASSERT_EQ(blob_counter.value(), values[1]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 5, + 5 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 5, + 5 * second_expected_bytes); + + blob_counter.SeekForPrev(keys[2]); + ASSERT_TRUE(blob_counter.Valid()); + ASSERT_OK(blob_counter.status()); + ASSERT_EQ(blob_counter.key(), keys[2]); + ASSERT_EQ(blob_counter.user_key(), user_key2); + ASSERT_EQ(blob_counter.value(), values[2]); + CheckInFlow(blob_garbage_meter, first_blob_file_number, 5, + 5 * first_expected_bytes); + CheckInFlow(blob_garbage_meter, second_blob_file_number, 5, + 5 * second_expected_bytes); +} + +TEST(BlobCountingIteratorTest, CorruptBlobIndex) { + const std::vector keys{ + test::KeyStr("user_key", 1, kTypeBlobIndex)}; + const std::vector values{"i_am_not_a_blob_index"}; + + assert(keys.size() == values.size()); + + VectorIterator input(keys, values); + BlobGarbageMeter blob_garbage_meter; + + BlobCountingIterator blob_counter(&input, &blob_garbage_meter); + + blob_counter.SeekToFirst(); + ASSERT_FALSE(blob_counter.Valid()); + ASSERT_NOK(blob_counter.status()); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_fetcher.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_fetcher.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_fetcher.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_fetcher.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,34 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_fetcher.h" + +#include "db/version_set.h" + +namespace ROCKSDB_NAMESPACE { + +Status BlobFetcher::FetchBlob(const Slice& user_key, + const Slice& blob_index_slice, + FilePrefetchBuffer* prefetch_buffer, + PinnableSlice* blob_value, + uint64_t* bytes_read) const { + assert(version_); + + return version_->GetBlob(read_options_, user_key, blob_index_slice, + prefetch_buffer, blob_value, bytes_read); +} + +Status BlobFetcher::FetchBlob(const Slice& user_key, + const BlobIndex& blob_index, + FilePrefetchBuffer* prefetch_buffer, + PinnableSlice* blob_value, + uint64_t* bytes_read) const { + assert(version_); + + return version_->GetBlob(read_options_, user_key, blob_index, prefetch_buffer, + blob_value, bytes_read); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_fetcher.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_fetcher.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_fetcher.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_fetcher.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,37 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "rocksdb/options.h" +#include "rocksdb/status.h" + +namespace ROCKSDB_NAMESPACE { + +class Version; +class Slice; +class FilePrefetchBuffer; +class PinnableSlice; +class BlobIndex; + +// A thin wrapper around the blob retrieval functionality of Version. +class BlobFetcher { + public: + BlobFetcher(const Version* version, const ReadOptions& read_options) + : version_(version), read_options_(read_options) {} + + Status FetchBlob(const Slice& user_key, const Slice& blob_index_slice, + FilePrefetchBuffer* prefetch_buffer, + PinnableSlice* blob_value, uint64_t* bytes_read) const; + + Status FetchBlob(const Slice& user_key, const BlobIndex& blob_index, + FilePrefetchBuffer* prefetch_buffer, + PinnableSlice* blob_value, uint64_t* bytes_read) const; + + private: + const Version* version_; + ReadOptions read_options_; +}; +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_addition.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_addition.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,156 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_addition.h" + +#include +#include + +#include "logging/event_logger.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "test_util/sync_point.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { + +// Tags for custom fields. Note that these get persisted in the manifest, +// so existing tags should not be modified. +enum BlobFileAddition::CustomFieldTags : uint32_t { + kEndMarker, + + // Add forward compatible fields here + + ///////////////////////////////////////////////////////////////////// + + kForwardIncompatibleMask = 1 << 6, + + // Add forward incompatible fields here +}; + +void BlobFileAddition::EncodeTo(std::string* output) const { + PutVarint64(output, blob_file_number_); + PutVarint64(output, total_blob_count_); + PutVarint64(output, total_blob_bytes_); + PutLengthPrefixedSlice(output, checksum_method_); + PutLengthPrefixedSlice(output, checksum_value_); + + // Encode any custom fields here. The format to use is a Varint32 tag (see + // CustomFieldTags above) followed by a length prefixed slice. Unknown custom + // fields will be ignored during decoding unless they're in the forward + // incompatible range. + + TEST_SYNC_POINT_CALLBACK("BlobFileAddition::EncodeTo::CustomFields", output); + + PutVarint32(output, kEndMarker); +} + +Status BlobFileAddition::DecodeFrom(Slice* input) { + constexpr char class_name[] = "BlobFileAddition"; + + if (!GetVarint64(input, &blob_file_number_)) { + return Status::Corruption(class_name, "Error decoding blob file number"); + } + + if (!GetVarint64(input, &total_blob_count_)) { + return Status::Corruption(class_name, "Error decoding total blob count"); + } + + if (!GetVarint64(input, &total_blob_bytes_)) { + return Status::Corruption(class_name, "Error decoding total blob bytes"); + } + + Slice checksum_method; + if (!GetLengthPrefixedSlice(input, &checksum_method)) { + return Status::Corruption(class_name, "Error decoding checksum method"); + } + checksum_method_ = checksum_method.ToString(); + + Slice checksum_value; + if (!GetLengthPrefixedSlice(input, &checksum_value)) { + return Status::Corruption(class_name, "Error decoding checksum value"); + } + checksum_value_ = checksum_value.ToString(); + + while (true) { + uint32_t custom_field_tag = 0; + if (!GetVarint32(input, &custom_field_tag)) { + return Status::Corruption(class_name, "Error decoding custom field tag"); + } + + if (custom_field_tag == kEndMarker) { + break; + } + + if (custom_field_tag & kForwardIncompatibleMask) { + return Status::Corruption( + class_name, "Forward incompatible custom field encountered"); + } + + Slice custom_field_value; + if (!GetLengthPrefixedSlice(input, &custom_field_value)) { + return Status::Corruption(class_name, + "Error decoding custom field value"); + } + } + + return Status::OK(); +} + +std::string BlobFileAddition::DebugString() const { + std::ostringstream oss; + + oss << *this; + + return oss.str(); +} + +std::string BlobFileAddition::DebugJSON() const { + JSONWriter jw; + + jw << *this; + + jw.EndObject(); + + return jw.Get(); +} + +bool operator==(const BlobFileAddition& lhs, const BlobFileAddition& rhs) { + return lhs.GetBlobFileNumber() == rhs.GetBlobFileNumber() && + lhs.GetTotalBlobCount() == rhs.GetTotalBlobCount() && + lhs.GetTotalBlobBytes() == rhs.GetTotalBlobBytes() && + lhs.GetChecksumMethod() == rhs.GetChecksumMethod() && + lhs.GetChecksumValue() == rhs.GetChecksumValue(); +} + +bool operator!=(const BlobFileAddition& lhs, const BlobFileAddition& rhs) { + return !(lhs == rhs); +} + +std::ostream& operator<<(std::ostream& os, + const BlobFileAddition& blob_file_addition) { + os << "blob_file_number: " << blob_file_addition.GetBlobFileNumber() + << " total_blob_count: " << blob_file_addition.GetTotalBlobCount() + << " total_blob_bytes: " << blob_file_addition.GetTotalBlobBytes() + << " checksum_method: " << blob_file_addition.GetChecksumMethod() + << " checksum_value: " + << Slice(blob_file_addition.GetChecksumValue()).ToString(/* hex */ true); + + return os; +} + +JSONWriter& operator<<(JSONWriter& jw, + const BlobFileAddition& blob_file_addition) { + jw << "BlobFileNumber" << blob_file_addition.GetBlobFileNumber() + << "TotalBlobCount" << blob_file_addition.GetTotalBlobCount() + << "TotalBlobBytes" << blob_file_addition.GetTotalBlobBytes() + << "ChecksumMethod" << blob_file_addition.GetChecksumMethod() + << "ChecksumValue" + << Slice(blob_file_addition.GetChecksumValue()).ToString(/* hex */ true); + + return jw; +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_addition.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_addition.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,67 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include +#include + +#include "db/blob/blob_constants.h" +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +class JSONWriter; +class Slice; +class Status; + +class BlobFileAddition { + public: + BlobFileAddition() = default; + + BlobFileAddition(uint64_t blob_file_number, uint64_t total_blob_count, + uint64_t total_blob_bytes, std::string checksum_method, + std::string checksum_value) + : blob_file_number_(blob_file_number), + total_blob_count_(total_blob_count), + total_blob_bytes_(total_blob_bytes), + checksum_method_(std::move(checksum_method)), + checksum_value_(std::move(checksum_value)) { + assert(checksum_method_.empty() == checksum_value_.empty()); + } + + uint64_t GetBlobFileNumber() const { return blob_file_number_; } + uint64_t GetTotalBlobCount() const { return total_blob_count_; } + uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; } + const std::string& GetChecksumMethod() const { return checksum_method_; } + const std::string& GetChecksumValue() const { return checksum_value_; } + + void EncodeTo(std::string* output) const; + Status DecodeFrom(Slice* input); + + std::string DebugString() const; + std::string DebugJSON() const; + + private: + enum CustomFieldTags : uint32_t; + + uint64_t blob_file_number_ = kInvalidBlobFileNumber; + uint64_t total_blob_count_ = 0; + uint64_t total_blob_bytes_ = 0; + std::string checksum_method_; + std::string checksum_value_; +}; + +bool operator==(const BlobFileAddition& lhs, const BlobFileAddition& rhs); +bool operator!=(const BlobFileAddition& lhs, const BlobFileAddition& rhs); + +std::ostream& operator<<(std::ostream& os, + const BlobFileAddition& blob_file_addition); +JSONWriter& operator<<(JSONWriter& jw, + const BlobFileAddition& blob_file_addition); + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,210 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_addition.h" + +#include +#include +#include + +#include "test_util/sync_point.h" +#include "test_util/testharness.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { + +class BlobFileAdditionTest : public testing::Test { + public: + static void TestEncodeDecode(const BlobFileAddition& blob_file_addition) { + std::string encoded; + blob_file_addition.EncodeTo(&encoded); + + BlobFileAddition decoded; + Slice input(encoded); + ASSERT_OK(decoded.DecodeFrom(&input)); + + ASSERT_EQ(blob_file_addition, decoded); + } +}; + +TEST_F(BlobFileAdditionTest, Empty) { + BlobFileAddition blob_file_addition; + + ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), kInvalidBlobFileNumber); + ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 0); + ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(), 0); + ASSERT_TRUE(blob_file_addition.GetChecksumMethod().empty()); + ASSERT_TRUE(blob_file_addition.GetChecksumValue().empty()); + + TestEncodeDecode(blob_file_addition); +} + +TEST_F(BlobFileAdditionTest, NonEmpty) { + constexpr uint64_t blob_file_number = 123; + constexpr uint64_t total_blob_count = 2; + constexpr uint64_t total_blob_bytes = 123456; + const std::string checksum_method("SHA1"); + const std::string checksum_value( + "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52" + "\x5c\xbd"); + + BlobFileAddition blob_file_addition(blob_file_number, total_blob_count, + total_blob_bytes, checksum_method, + checksum_value); + + ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number); + ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), total_blob_count); + ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(), total_blob_bytes); + ASSERT_EQ(blob_file_addition.GetChecksumMethod(), checksum_method); + ASSERT_EQ(blob_file_addition.GetChecksumValue(), checksum_value); + + TestEncodeDecode(blob_file_addition); +} + +TEST_F(BlobFileAdditionTest, DecodeErrors) { + std::string str; + Slice slice(str); + + BlobFileAddition blob_file_addition; + + { + const Status s = blob_file_addition.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "blob file number")); + } + + constexpr uint64_t blob_file_number = 123; + PutVarint64(&str, blob_file_number); + slice = str; + + { + const Status s = blob_file_addition.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "total blob count")); + } + + constexpr uint64_t total_blob_count = 4567; + PutVarint64(&str, total_blob_count); + slice = str; + + { + const Status s = blob_file_addition.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "total blob bytes")); + } + + constexpr uint64_t total_blob_bytes = 12345678; + PutVarint64(&str, total_blob_bytes); + slice = str; + + { + const Status s = blob_file_addition.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "checksum method")); + } + + constexpr char checksum_method[] = "SHA1"; + PutLengthPrefixedSlice(&str, checksum_method); + slice = str; + + { + const Status s = blob_file_addition.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "checksum value")); + } + + constexpr char checksum_value[] = + "\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52" + "\x5c\xbd"; + PutLengthPrefixedSlice(&str, checksum_value); + slice = str; + + { + const Status s = blob_file_addition.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "custom field tag")); + } + + constexpr uint32_t custom_tag = 2; + PutVarint32(&str, custom_tag); + slice = str; + + { + const Status s = blob_file_addition.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "custom field value")); + } +} + +TEST_F(BlobFileAdditionTest, ForwardCompatibleCustomField) { + SyncPoint::GetInstance()->SetCallBack( + "BlobFileAddition::EncodeTo::CustomFields", [&](void* arg) { + std::string* output = static_cast(arg); + + constexpr uint32_t forward_compatible_tag = 2; + PutVarint32(output, forward_compatible_tag); + + PutLengthPrefixedSlice(output, "deadbeef"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr uint64_t blob_file_number = 678; + constexpr uint64_t total_blob_count = 9999; + constexpr uint64_t total_blob_bytes = 100000000; + const std::string checksum_method("CRC32"); + const std::string checksum_value("\x3d\x87\xff\x57"); + + BlobFileAddition blob_file_addition(blob_file_number, total_blob_count, + total_blob_bytes, checksum_method, + checksum_value); + + TestEncodeDecode(blob_file_addition); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(BlobFileAdditionTest, ForwardIncompatibleCustomField) { + SyncPoint::GetInstance()->SetCallBack( + "BlobFileAddition::EncodeTo::CustomFields", [&](void* arg) { + std::string* output = static_cast(arg); + + constexpr uint32_t forward_incompatible_tag = (1 << 6) + 1; + PutVarint32(output, forward_incompatible_tag); + + PutLengthPrefixedSlice(output, "foobar"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr uint64_t blob_file_number = 456; + constexpr uint64_t total_blob_count = 100; + constexpr uint64_t total_blob_bytes = 2000000; + const std::string checksum_method("CRC32B"); + const std::string checksum_value("\x6d\xbd\xf2\x3a"); + + BlobFileAddition blob_file_addition(blob_file_number, total_blob_count, + total_blob_bytes, checksum_method, + checksum_value); + + std::string encoded; + blob_file_addition.EncodeTo(&encoded); + + BlobFileAddition decoded_blob_file_addition; + Slice input(encoded); + const Status s = decoded_blob_file_addition.DecodeFrom(&input); + + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "Forward incompatible")); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_builder.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_builder.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,375 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_builder.h" + +#include + +#include "db/blob/blob_file_addition.h" +#include "db/blob/blob_file_completion_callback.h" +#include "db/blob/blob_index.h" +#include "db/blob/blob_log_format.h" +#include "db/blob/blob_log_writer.h" +#include "db/event_helpers.h" +#include "db/version_set.h" +#include "file/filename.h" +#include "file/read_write_util.h" +#include "file/writable_file_writer.h" +#include "logging/logging.h" +#include "options/cf_options.h" +#include "options/options_helper.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "test_util/sync_point.h" +#include "trace_replay/io_tracer.h" +#include "util/compression.h" + +namespace ROCKSDB_NAMESPACE { + +BlobFileBuilder::BlobFileBuilder( + VersionSet* versions, FileSystem* fs, + const ImmutableOptions* immutable_options, + const MutableCFOptions* mutable_cf_options, const FileOptions* file_options, + int job_id, uint32_t column_family_id, + const std::string& column_family_name, Env::IOPriority io_priority, + Env::WriteLifeTimeHint write_hint, + const std::shared_ptr& io_tracer, + BlobFileCompletionCallback* blob_callback, + BlobFileCreationReason creation_reason, + std::vector* blob_file_paths, + std::vector* blob_file_additions) + : BlobFileBuilder([versions]() { return versions->NewFileNumber(); }, fs, + immutable_options, mutable_cf_options, file_options, + job_id, column_family_id, column_family_name, io_priority, + write_hint, io_tracer, blob_callback, creation_reason, + blob_file_paths, blob_file_additions) {} + +BlobFileBuilder::BlobFileBuilder( + std::function file_number_generator, FileSystem* fs, + const ImmutableOptions* immutable_options, + const MutableCFOptions* mutable_cf_options, const FileOptions* file_options, + int job_id, uint32_t column_family_id, + const std::string& column_family_name, Env::IOPriority io_priority, + Env::WriteLifeTimeHint write_hint, + const std::shared_ptr& io_tracer, + BlobFileCompletionCallback* blob_callback, + BlobFileCreationReason creation_reason, + std::vector* blob_file_paths, + std::vector* blob_file_additions) + : file_number_generator_(std::move(file_number_generator)), + fs_(fs), + immutable_options_(immutable_options), + min_blob_size_(mutable_cf_options->min_blob_size), + blob_file_size_(mutable_cf_options->blob_file_size), + blob_compression_type_(mutable_cf_options->blob_compression_type), + file_options_(file_options), + job_id_(job_id), + column_family_id_(column_family_id), + column_family_name_(column_family_name), + io_priority_(io_priority), + write_hint_(write_hint), + io_tracer_(io_tracer), + blob_callback_(blob_callback), + creation_reason_(creation_reason), + blob_file_paths_(blob_file_paths), + blob_file_additions_(blob_file_additions), + blob_count_(0), + blob_bytes_(0) { + assert(file_number_generator_); + assert(fs_); + assert(immutable_options_); + assert(file_options_); + assert(blob_file_paths_); + assert(blob_file_paths_->empty()); + assert(blob_file_additions_); + assert(blob_file_additions_->empty()); +} + +BlobFileBuilder::~BlobFileBuilder() = default; + +Status BlobFileBuilder::Add(const Slice& key, const Slice& value, + std::string* blob_index) { + assert(blob_index); + assert(blob_index->empty()); + + if (value.size() < min_blob_size_) { + return Status::OK(); + } + + { + const Status s = OpenBlobFileIfNeeded(); + if (!s.ok()) { + return s; + } + } + + Slice blob = value; + std::string compressed_blob; + + { + const Status s = CompressBlobIfNeeded(&blob, &compressed_blob); + if (!s.ok()) { + return s; + } + } + + uint64_t blob_file_number = 0; + uint64_t blob_offset = 0; + + { + const Status s = + WriteBlobToFile(key, blob, &blob_file_number, &blob_offset); + if (!s.ok()) { + return s; + } + } + + { + const Status s = CloseBlobFileIfNeeded(); + if (!s.ok()) { + return s; + } + } + + BlobIndex::EncodeBlob(blob_index, blob_file_number, blob_offset, blob.size(), + blob_compression_type_); + + return Status::OK(); +} + +Status BlobFileBuilder::Finish() { + if (!IsBlobFileOpen()) { + return Status::OK(); + } + + return CloseBlobFile(); +} + +bool BlobFileBuilder::IsBlobFileOpen() const { return !!writer_; } + +Status BlobFileBuilder::OpenBlobFileIfNeeded() { + if (IsBlobFileOpen()) { + return Status::OK(); + } + + assert(!blob_count_); + assert(!blob_bytes_); + + assert(file_number_generator_); + const uint64_t blob_file_number = file_number_generator_(); + + assert(immutable_options_); + assert(!immutable_options_->cf_paths.empty()); + std::string blob_file_path = + BlobFileName(immutable_options_->cf_paths.front().path, blob_file_number); + + if (blob_callback_) { + blob_callback_->OnBlobFileCreationStarted( + blob_file_path, column_family_name_, job_id_, creation_reason_); + } + + std::unique_ptr file; + + { + assert(file_options_); + Status s = NewWritableFile(fs_, blob_file_path, &file, *file_options_); + + TEST_SYNC_POINT_CALLBACK( + "BlobFileBuilder::OpenBlobFileIfNeeded:NewWritableFile", &s); + + if (!s.ok()) { + return s; + } + } + + // Note: files get added to blob_file_paths_ right after the open, so they + // can be cleaned up upon failure. Contrast this with blob_file_additions_, + // which only contains successfully written files. + assert(blob_file_paths_); + blob_file_paths_->emplace_back(std::move(blob_file_path)); + + assert(file); + file->SetIOPriority(io_priority_); + file->SetWriteLifeTimeHint(write_hint_); + FileTypeSet tmp_set = immutable_options_->checksum_handoff_file_types; + Statistics* const statistics = immutable_options_->stats; + std::unique_ptr file_writer(new WritableFileWriter( + std::move(file), blob_file_paths_->back(), *file_options_, + immutable_options_->clock, io_tracer_, statistics, + immutable_options_->listeners, + immutable_options_->file_checksum_gen_factory.get(), + tmp_set.Contains(FileType::kBlobFile), false)); + + constexpr bool do_flush = false; + + std::unique_ptr blob_log_writer(new BlobLogWriter( + std::move(file_writer), immutable_options_->clock, statistics, + blob_file_number, immutable_options_->use_fsync, do_flush)); + + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + + BlobLogHeader header(column_family_id_, blob_compression_type_, has_ttl, + expiration_range); + + { + Status s = blob_log_writer->WriteHeader(header); + + TEST_SYNC_POINT_CALLBACK( + "BlobFileBuilder::OpenBlobFileIfNeeded:WriteHeader", &s); + + if (!s.ok()) { + return s; + } + } + + writer_ = std::move(blob_log_writer); + + assert(IsBlobFileOpen()); + + return Status::OK(); +} + +Status BlobFileBuilder::CompressBlobIfNeeded( + Slice* blob, std::string* compressed_blob) const { + assert(blob); + assert(compressed_blob); + assert(compressed_blob->empty()); + assert(immutable_options_); + + if (blob_compression_type_ == kNoCompression) { + return Status::OK(); + } + + CompressionOptions opts; + CompressionContext context(blob_compression_type_); + constexpr uint64_t sample_for_compression = 0; + + CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), + blob_compression_type_, sample_for_compression); + + constexpr uint32_t compression_format_version = 2; + + bool success = false; + + { + StopWatch stop_watch(immutable_options_->clock, immutable_options_->stats, + BLOB_DB_COMPRESSION_MICROS); + success = + CompressData(*blob, info, compression_format_version, compressed_blob); + } + + if (!success) { + return Status::Corruption("Error compressing blob"); + } + + *blob = Slice(*compressed_blob); + + return Status::OK(); +} + +Status BlobFileBuilder::WriteBlobToFile(const Slice& key, const Slice& blob, + uint64_t* blob_file_number, + uint64_t* blob_offset) { + assert(IsBlobFileOpen()); + assert(blob_file_number); + assert(blob_offset); + + uint64_t key_offset = 0; + + Status s = writer_->AddRecord(key, blob, &key_offset, blob_offset); + + TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AddRecord", &s); + + if (!s.ok()) { + return s; + } + + *blob_file_number = writer_->get_log_number(); + + ++blob_count_; + blob_bytes_ += BlobLogRecord::kHeaderSize + key.size() + blob.size(); + + return Status::OK(); +} + +Status BlobFileBuilder::CloseBlobFile() { + assert(IsBlobFileOpen()); + + BlobLogFooter footer; + footer.blob_count = blob_count_; + + std::string checksum_method; + std::string checksum_value; + + Status s = writer_->AppendFooter(footer, &checksum_method, &checksum_value); + + TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AppendFooter", &s); + + if (!s.ok()) { + return s; + } + + const uint64_t blob_file_number = writer_->get_log_number(); + + if (blob_callback_) { + s = blob_callback_->OnBlobFileCompleted( + blob_file_paths_->back(), column_family_name_, job_id_, + blob_file_number, creation_reason_, s, checksum_value, checksum_method, + blob_count_, blob_bytes_); + } + + assert(blob_file_additions_); + blob_file_additions_->emplace_back(blob_file_number, blob_count_, blob_bytes_, + std::move(checksum_method), + std::move(checksum_value)); + + assert(immutable_options_); + ROCKS_LOG_INFO(immutable_options_->logger, + "[%s] [JOB %d] Generated blob file #%" PRIu64 ": %" PRIu64 + " total blobs, %" PRIu64 " total bytes", + column_family_name_.c_str(), job_id_, blob_file_number, + blob_count_, blob_bytes_); + + writer_.reset(); + blob_count_ = 0; + blob_bytes_ = 0; + + return s; +} + +Status BlobFileBuilder::CloseBlobFileIfNeeded() { + assert(IsBlobFileOpen()); + + const WritableFileWriter* const file_writer = writer_->file(); + assert(file_writer); + + if (file_writer->GetFileSize() < blob_file_size_) { + return Status::OK(); + } + + return CloseBlobFile(); +} + +void BlobFileBuilder::Abandon(const Status& s) { + if (!IsBlobFileOpen()) { + return; + } + if (blob_callback_) { + // BlobFileBuilder::Abandon() is called because of error while writing to + // Blob files. So we can ignore the below error. + blob_callback_ + ->OnBlobFileCompleted(blob_file_paths_->back(), column_family_name_, + job_id_, writer_->get_log_number(), + creation_reason_, s, "", "", blob_count_, + blob_bytes_) + .PermitUncheckedError(); + } + + writer_.reset(); + blob_count_ = 0; + blob_bytes_ = 0; +} +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_builder.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_builder.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,103 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +#pragma once + +#include +#include +#include +#include +#include + +#include "rocksdb/compression_type.h" +#include "rocksdb/env.h" +#include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/types.h" + +namespace ROCKSDB_NAMESPACE { + +class VersionSet; +class FileSystem; +class SystemClock; +struct ImmutableOptions; +struct MutableCFOptions; +struct FileOptions; +class BlobFileAddition; +class Status; +class Slice; +class BlobLogWriter; +class IOTracer; +class BlobFileCompletionCallback; + +class BlobFileBuilder { + public: + BlobFileBuilder(VersionSet* versions, FileSystem* fs, + const ImmutableOptions* immutable_options, + const MutableCFOptions* mutable_cf_options, + const FileOptions* file_options, int job_id, + uint32_t column_family_id, + const std::string& column_family_name, + Env::IOPriority io_priority, + Env::WriteLifeTimeHint write_hint, + const std::shared_ptr& io_tracer, + BlobFileCompletionCallback* blob_callback, + BlobFileCreationReason creation_reason, + std::vector* blob_file_paths, + std::vector* blob_file_additions); + + BlobFileBuilder(std::function file_number_generator, + FileSystem* fs, const ImmutableOptions* immutable_options, + const MutableCFOptions* mutable_cf_options, + const FileOptions* file_options, int job_id, + uint32_t column_family_id, + const std::string& column_family_name, + Env::IOPriority io_priority, + Env::WriteLifeTimeHint write_hint, + const std::shared_ptr& io_tracer, + BlobFileCompletionCallback* blob_callback, + BlobFileCreationReason creation_reason, + std::vector* blob_file_paths, + std::vector* blob_file_additions); + + BlobFileBuilder(const BlobFileBuilder&) = delete; + BlobFileBuilder& operator=(const BlobFileBuilder&) = delete; + + ~BlobFileBuilder(); + + Status Add(const Slice& key, const Slice& value, std::string* blob_index); + Status Finish(); + void Abandon(const Status& s); + + private: + bool IsBlobFileOpen() const; + Status OpenBlobFileIfNeeded(); + Status CompressBlobIfNeeded(Slice* blob, std::string* compressed_blob) const; + Status WriteBlobToFile(const Slice& key, const Slice& blob, + uint64_t* blob_file_number, uint64_t* blob_offset); + Status CloseBlobFile(); + Status CloseBlobFileIfNeeded(); + + std::function file_number_generator_; + FileSystem* fs_; + const ImmutableOptions* immutable_options_; + uint64_t min_blob_size_; + uint64_t blob_file_size_; + CompressionType blob_compression_type_; + const FileOptions* file_options_; + int job_id_; + uint32_t column_family_id_; + std::string column_family_name_; + Env::IOPriority io_priority_; + Env::WriteLifeTimeHint write_hint_; + std::shared_ptr io_tracer_; + BlobFileCompletionCallback* blob_callback_; + BlobFileCreationReason creation_reason_; + std::vector* blob_file_paths_; + std::vector* blob_file_additions_; + std::unique_ptr writer_; + uint64_t blob_count_; + uint64_t blob_bytes_; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,672 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_builder.h" + +#include +#include +#include +#include +#include + +#include "db/blob/blob_file_addition.h" +#include "db/blob/blob_index.h" +#include "db/blob/blob_log_format.h" +#include "db/blob/blob_log_sequential_reader.h" +#include "env/mock_env.h" +#include "file/filename.h" +#include "file/random_access_file_reader.h" +#include "options/cf_options.h" +#include "rocksdb/env.h" +#include "rocksdb/file_checksum.h" +#include "rocksdb/options.h" +#include "test_util/sync_point.h" +#include "test_util/testharness.h" +#include "util/compression.h" +#include "utilities/fault_injection_env.h" + +namespace ROCKSDB_NAMESPACE { + +class TestFileNumberGenerator { + public: + uint64_t operator()() { return ++next_file_number_; } + + private: + uint64_t next_file_number_ = 1; +}; + +class BlobFileBuilderTest : public testing::Test { + protected: + BlobFileBuilderTest() { + mock_env_.reset(MockEnv::Create(Env::Default())); + fs_ = mock_env_->GetFileSystem().get(); + clock_ = mock_env_->GetSystemClock().get(); + } + + void VerifyBlobFile(uint64_t blob_file_number, + const std::string& blob_file_path, + uint32_t column_family_id, + CompressionType blob_compression_type, + const std::vector>& + expected_key_value_pairs, + const std::vector& blob_indexes) { + assert(expected_key_value_pairs.size() == blob_indexes.size()); + + std::unique_ptr file; + constexpr IODebugContext* dbg = nullptr; + ASSERT_OK( + fs_->NewRandomAccessFile(blob_file_path, file_options_, &file, dbg)); + + std::unique_ptr file_reader( + new RandomAccessFileReader(std::move(file), blob_file_path, clock_)); + + constexpr Statistics* statistics = nullptr; + BlobLogSequentialReader blob_log_reader(std::move(file_reader), clock_, + statistics); + + BlobLogHeader header; + ASSERT_OK(blob_log_reader.ReadHeader(&header)); + ASSERT_EQ(header.version, kVersion1); + ASSERT_EQ(header.column_family_id, column_family_id); + ASSERT_EQ(header.compression, blob_compression_type); + ASSERT_FALSE(header.has_ttl); + ASSERT_EQ(header.expiration_range, ExpirationRange()); + + for (size_t i = 0; i < expected_key_value_pairs.size(); ++i) { + BlobLogRecord record; + uint64_t blob_offset = 0; + + ASSERT_OK(blob_log_reader.ReadRecord( + &record, BlobLogSequentialReader::kReadHeaderKeyBlob, &blob_offset)); + + // Check the contents of the blob file + const auto& expected_key_value = expected_key_value_pairs[i]; + const auto& key = expected_key_value.first; + const auto& value = expected_key_value.second; + + ASSERT_EQ(record.key_size, key.size()); + ASSERT_EQ(record.value_size, value.size()); + ASSERT_EQ(record.expiration, 0); + ASSERT_EQ(record.key, key); + ASSERT_EQ(record.value, value); + + // Make sure the blob reference returned by the builder points to the + // right place + BlobIndex blob_index; + ASSERT_OK(blob_index.DecodeFrom(blob_indexes[i])); + ASSERT_FALSE(blob_index.IsInlined()); + ASSERT_FALSE(blob_index.HasTTL()); + ASSERT_EQ(blob_index.file_number(), blob_file_number); + ASSERT_EQ(blob_index.offset(), blob_offset); + ASSERT_EQ(blob_index.size(), value.size()); + } + + BlobLogFooter footer; + ASSERT_OK(blob_log_reader.ReadFooter(&footer)); + ASSERT_EQ(footer.blob_count, expected_key_value_pairs.size()); + ASSERT_EQ(footer.expiration_range, ExpirationRange()); + } + + std::unique_ptr mock_env_; + FileSystem* fs_; + SystemClock* clock_; + FileOptions file_options_; +}; + +TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) { + // Build a single blob file + constexpr size_t number_of_blobs = 10; + constexpr size_t key_size = 1; + constexpr size_t value_size = 4; + constexpr size_t value_offset = 1234; + + Options options; + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileBuilderTest_BuildAndCheckOneFile"), + 0); + options.enable_blob_files = true; + options.env = mock_env_.get(); + + ImmutableOptions immutable_options(options); + MutableCFOptions mutable_cf_options(options); + + constexpr int job_id = 1; + constexpr uint32_t column_family_id = 123; + constexpr char column_family_name[] = "foobar"; + constexpr Env::IOPriority io_priority = Env::IO_HIGH; + constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM; + + std::vector blob_file_paths; + std::vector blob_file_additions; + + BlobFileBuilder builder( + TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, + &file_options_, job_id, column_family_id, column_family_name, io_priority, + write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, + BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); + + std::vector> expected_key_value_pairs( + number_of_blobs); + std::vector blob_indexes(number_of_blobs); + + for (size_t i = 0; i < number_of_blobs; ++i) { + auto& expected_key_value = expected_key_value_pairs[i]; + + auto& key = expected_key_value.first; + key = std::to_string(i); + assert(key.size() == key_size); + + auto& value = expected_key_value.second; + value = std::to_string(i + value_offset); + assert(value.size() == value_size); + + auto& blob_index = blob_indexes[i]; + + ASSERT_OK(builder.Add(key, value, &blob_index)); + ASSERT_FALSE(blob_index.empty()); + } + + ASSERT_OK(builder.Finish()); + + // Check the metadata generated + constexpr uint64_t blob_file_number = 2; + + ASSERT_EQ(blob_file_paths.size(), 1); + + const std::string& blob_file_path = blob_file_paths[0]; + + ASSERT_EQ( + blob_file_path, + BlobFileName(immutable_options.cf_paths.front().path, blob_file_number)); + + ASSERT_EQ(blob_file_additions.size(), 1); + + const auto& blob_file_addition = blob_file_additions[0]; + + ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number); + ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), number_of_blobs); + ASSERT_EQ( + blob_file_addition.GetTotalBlobBytes(), + number_of_blobs * (BlobLogRecord::kHeaderSize + key_size + value_size)); + + // Verify the contents of the new blob file as well as the blob references + VerifyBlobFile(blob_file_number, blob_file_path, column_family_id, + kNoCompression, expected_key_value_pairs, blob_indexes); +} + +TEST_F(BlobFileBuilderTest, BuildAndCheckMultipleFiles) { + // Build multiple blob files: file size limit is set to the size of a single + // value, so each blob ends up in a file of its own + constexpr size_t number_of_blobs = 10; + constexpr size_t key_size = 1; + constexpr size_t value_size = 10; + constexpr size_t value_offset = 1234567890; + + Options options; + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileBuilderTest_BuildAndCheckMultipleFiles"), + 0); + options.enable_blob_files = true; + options.blob_file_size = value_size; + options.env = mock_env_.get(); + + ImmutableOptions immutable_options(options); + MutableCFOptions mutable_cf_options(options); + + constexpr int job_id = 1; + constexpr uint32_t column_family_id = 123; + constexpr char column_family_name[] = "foobar"; + constexpr Env::IOPriority io_priority = Env::IO_HIGH; + constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM; + + std::vector blob_file_paths; + std::vector blob_file_additions; + + BlobFileBuilder builder( + TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, + &file_options_, job_id, column_family_id, column_family_name, io_priority, + write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, + BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); + + std::vector> expected_key_value_pairs( + number_of_blobs); + std::vector blob_indexes(number_of_blobs); + + for (size_t i = 0; i < number_of_blobs; ++i) { + auto& expected_key_value = expected_key_value_pairs[i]; + + auto& key = expected_key_value.first; + key = std::to_string(i); + assert(key.size() == key_size); + + auto& value = expected_key_value.second; + value = std::to_string(i + value_offset); + assert(value.size() == value_size); + + auto& blob_index = blob_indexes[i]; + + ASSERT_OK(builder.Add(key, value, &blob_index)); + ASSERT_FALSE(blob_index.empty()); + } + + ASSERT_OK(builder.Finish()); + + // Check the metadata generated + ASSERT_EQ(blob_file_paths.size(), number_of_blobs); + ASSERT_EQ(blob_file_additions.size(), number_of_blobs); + + for (size_t i = 0; i < number_of_blobs; ++i) { + const uint64_t blob_file_number = i + 2; + + ASSERT_EQ(blob_file_paths[i], + BlobFileName(immutable_options.cf_paths.front().path, + blob_file_number)); + + const auto& blob_file_addition = blob_file_additions[i]; + + ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number); + ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 1); + ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(), + BlobLogRecord::kHeaderSize + key_size + value_size); + } + + // Verify the contents of the new blob files as well as the blob references + for (size_t i = 0; i < number_of_blobs; ++i) { + std::vector> expected_key_value_pair{ + expected_key_value_pairs[i]}; + std::vector blob_index{blob_indexes[i]}; + + VerifyBlobFile(i + 2, blob_file_paths[i], column_family_id, kNoCompression, + expected_key_value_pair, blob_index); + } +} + +TEST_F(BlobFileBuilderTest, InlinedValues) { + // All values are below the min_blob_size threshold; no blob files get written + constexpr size_t number_of_blobs = 10; + constexpr size_t key_size = 1; + constexpr size_t value_size = 10; + constexpr size_t value_offset = 1234567890; + + Options options; + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileBuilderTest_InlinedValues"), + 0); + options.enable_blob_files = true; + options.min_blob_size = 1024; + options.env = mock_env_.get(); + + ImmutableOptions immutable_options(options); + MutableCFOptions mutable_cf_options(options); + + constexpr int job_id = 1; + constexpr uint32_t column_family_id = 123; + constexpr char column_family_name[] = "foobar"; + constexpr Env::IOPriority io_priority = Env::IO_HIGH; + constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM; + + std::vector blob_file_paths; + std::vector blob_file_additions; + + BlobFileBuilder builder( + TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, + &file_options_, job_id, column_family_id, column_family_name, io_priority, + write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, + BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); + + for (size_t i = 0; i < number_of_blobs; ++i) { + const std::string key = std::to_string(i); + assert(key.size() == key_size); + + const std::string value = std::to_string(i + value_offset); + assert(value.size() == value_size); + + std::string blob_index; + ASSERT_OK(builder.Add(key, value, &blob_index)); + ASSERT_TRUE(blob_index.empty()); + } + + ASSERT_OK(builder.Finish()); + + // Check the metadata generated + ASSERT_TRUE(blob_file_paths.empty()); + ASSERT_TRUE(blob_file_additions.empty()); +} + +TEST_F(BlobFileBuilderTest, Compression) { + // Build a blob file with a compressed blob + if (!Snappy_Supported()) { + return; + } + + constexpr size_t key_size = 1; + constexpr size_t value_size = 100; + + Options options; + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), "BlobFileBuilderTest_Compression"), + 0); + options.enable_blob_files = true; + options.blob_compression_type = kSnappyCompression; + options.env = mock_env_.get(); + + ImmutableOptions immutable_options(options); + MutableCFOptions mutable_cf_options(options); + + constexpr int job_id = 1; + constexpr uint32_t column_family_id = 123; + constexpr char column_family_name[] = "foobar"; + constexpr Env::IOPriority io_priority = Env::IO_HIGH; + constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM; + + std::vector blob_file_paths; + std::vector blob_file_additions; + + BlobFileBuilder builder( + TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, + &file_options_, job_id, column_family_id, column_family_name, io_priority, + write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, + BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); + + const std::string key("1"); + const std::string uncompressed_value(value_size, 'x'); + + std::string blob_index; + + ASSERT_OK(builder.Add(key, uncompressed_value, &blob_index)); + ASSERT_FALSE(blob_index.empty()); + + ASSERT_OK(builder.Finish()); + + // Check the metadata generated + constexpr uint64_t blob_file_number = 2; + + ASSERT_EQ(blob_file_paths.size(), 1); + + const std::string& blob_file_path = blob_file_paths[0]; + + ASSERT_EQ( + blob_file_path, + BlobFileName(immutable_options.cf_paths.front().path, blob_file_number)); + + ASSERT_EQ(blob_file_additions.size(), 1); + + const auto& blob_file_addition = blob_file_additions[0]; + + ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number); + ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 1); + + CompressionOptions opts; + CompressionContext context(kSnappyCompression); + constexpr uint64_t sample_for_compression = 0; + + CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), + kSnappyCompression, sample_for_compression); + + std::string compressed_value; + ASSERT_TRUE(Snappy_Compress(info, uncompressed_value.data(), + uncompressed_value.size(), &compressed_value)); + + ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(), + BlobLogRecord::kHeaderSize + key_size + compressed_value.size()); + + // Verify the contents of the new blob file as well as the blob reference + std::vector> expected_key_value_pairs{ + {key, compressed_value}}; + std::vector blob_indexes{blob_index}; + + VerifyBlobFile(blob_file_number, blob_file_path, column_family_id, + kSnappyCompression, expected_key_value_pairs, blob_indexes); +} + +TEST_F(BlobFileBuilderTest, CompressionError) { + // Simulate an error during compression + if (!Snappy_Supported()) { + return; + } + + Options options; + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileBuilderTest_CompressionError"), + 0); + options.enable_blob_files = true; + options.blob_compression_type = kSnappyCompression; + options.env = mock_env_.get(); + ImmutableOptions immutable_options(options); + MutableCFOptions mutable_cf_options(options); + + constexpr int job_id = 1; + constexpr uint32_t column_family_id = 123; + constexpr char column_family_name[] = "foobar"; + constexpr Env::IOPriority io_priority = Env::IO_HIGH; + constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM; + + std::vector blob_file_paths; + std::vector blob_file_additions; + + BlobFileBuilder builder( + TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, + &file_options_, job_id, column_family_id, column_family_name, io_priority, + write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, + BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); + + SyncPoint::GetInstance()->SetCallBack("CompressData:TamperWithReturnValue", + [](void* arg) { + bool* ret = static_cast(arg); + *ret = false; + }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr char key[] = "1"; + constexpr char value[] = "deadbeef"; + + std::string blob_index; + + ASSERT_TRUE(builder.Add(key, value, &blob_index).IsCorruption()); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + constexpr uint64_t blob_file_number = 2; + + ASSERT_EQ(blob_file_paths.size(), 1); + ASSERT_EQ( + blob_file_paths[0], + BlobFileName(immutable_options.cf_paths.front().path, blob_file_number)); + + ASSERT_TRUE(blob_file_additions.empty()); +} + +TEST_F(BlobFileBuilderTest, Checksum) { + // Build a blob file with checksum + + class DummyFileChecksumGenerator : public FileChecksumGenerator { + public: + void Update(const char* /* data */, size_t /* n */) override {} + + void Finalize() override {} + + std::string GetChecksum() const override { return std::string("dummy"); } + + const char* Name() const override { return "DummyFileChecksum"; } + }; + + class DummyFileChecksumGenFactory : public FileChecksumGenFactory { + public: + std::unique_ptr CreateFileChecksumGenerator( + const FileChecksumGenContext& /* context */) override { + return std::unique_ptr( + new DummyFileChecksumGenerator); + } + + const char* Name() const override { return "DummyFileChecksumGenFactory"; } + }; + + Options options; + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), "BlobFileBuilderTest_Checksum"), + 0); + options.enable_blob_files = true; + options.file_checksum_gen_factory = + std::make_shared(); + options.env = mock_env_.get(); + + ImmutableOptions immutable_options(options); + MutableCFOptions mutable_cf_options(options); + + constexpr int job_id = 1; + constexpr uint32_t column_family_id = 123; + constexpr char column_family_name[] = "foobar"; + constexpr Env::IOPriority io_priority = Env::IO_HIGH; + constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM; + + std::vector blob_file_paths; + std::vector blob_file_additions; + + BlobFileBuilder builder( + TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, + &file_options_, job_id, column_family_id, column_family_name, io_priority, + write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, + BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); + + const std::string key("1"); + const std::string value("deadbeef"); + + std::string blob_index; + + ASSERT_OK(builder.Add(key, value, &blob_index)); + ASSERT_FALSE(blob_index.empty()); + + ASSERT_OK(builder.Finish()); + + // Check the metadata generated + constexpr uint64_t blob_file_number = 2; + + ASSERT_EQ(blob_file_paths.size(), 1); + + const std::string& blob_file_path = blob_file_paths[0]; + + ASSERT_EQ( + blob_file_path, + BlobFileName(immutable_options.cf_paths.front().path, blob_file_number)); + + ASSERT_EQ(blob_file_additions.size(), 1); + + const auto& blob_file_addition = blob_file_additions[0]; + + ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number); + ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 1); + ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(), + BlobLogRecord::kHeaderSize + key.size() + value.size()); + ASSERT_EQ(blob_file_addition.GetChecksumMethod(), "DummyFileChecksum"); + ASSERT_EQ(blob_file_addition.GetChecksumValue(), "dummy"); + + // Verify the contents of the new blob file as well as the blob reference + std::vector> expected_key_value_pairs{ + {key, value}}; + std::vector blob_indexes{blob_index}; + + VerifyBlobFile(blob_file_number, blob_file_path, column_family_id, + kNoCompression, expected_key_value_pairs, blob_indexes); +} + +class BlobFileBuilderIOErrorTest + : public testing::Test, + public testing::WithParamInterface { + protected: + BlobFileBuilderIOErrorTest() : sync_point_(GetParam()) { + mock_env_.reset(MockEnv::Create(Env::Default())); + fs_ = mock_env_->GetFileSystem().get(); + } + + std::unique_ptr mock_env_; + FileSystem* fs_; + FileOptions file_options_; + std::string sync_point_; +}; + +INSTANTIATE_TEST_CASE_P( + BlobFileBuilderTest, BlobFileBuilderIOErrorTest, + ::testing::ValuesIn(std::vector{ + "BlobFileBuilder::OpenBlobFileIfNeeded:NewWritableFile", + "BlobFileBuilder::OpenBlobFileIfNeeded:WriteHeader", + "BlobFileBuilder::WriteBlobToFile:AddRecord", + "BlobFileBuilder::WriteBlobToFile:AppendFooter"})); + +TEST_P(BlobFileBuilderIOErrorTest, IOError) { + // Simulate an I/O error during the specified step of Add() + // Note: blob_file_size will be set to value_size in order for the first blob + // to trigger close + constexpr size_t value_size = 8; + + Options options; + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileBuilderIOErrorTest_IOError"), + 0); + options.enable_blob_files = true; + options.blob_file_size = value_size; + options.env = mock_env_.get(); + + ImmutableOptions immutable_options(options); + MutableCFOptions mutable_cf_options(options); + + constexpr int job_id = 1; + constexpr uint32_t column_family_id = 123; + constexpr char column_family_name[] = "foobar"; + constexpr Env::IOPriority io_priority = Env::IO_HIGH; + constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM; + + std::vector blob_file_paths; + std::vector blob_file_additions; + + BlobFileBuilder builder( + TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, + &file_options_, job_id, column_family_id, column_family_name, io_priority, + write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, + BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); + + SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) { + Status* const s = static_cast(arg); + assert(s); + + (*s) = Status::IOError(sync_point_); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr char key[] = "1"; + constexpr char value[] = "deadbeef"; + + std::string blob_index; + + ASSERT_TRUE(builder.Add(key, value, &blob_index).IsIOError()); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + if (sync_point_ == "BlobFileBuilder::OpenBlobFileIfNeeded:NewWritableFile") { + ASSERT_TRUE(blob_file_paths.empty()); + } else { + constexpr uint64_t blob_file_number = 2; + + ASSERT_EQ(blob_file_paths.size(), 1); + ASSERT_EQ(blob_file_paths[0], + BlobFileName(immutable_options.cf_paths.front().path, + blob_file_number)); + } + + ASSERT_TRUE(blob_file_additions.empty()); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_cache.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_cache.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,102 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_cache.h" + +#include +#include + +#include "db/blob/blob_file_reader.h" +#include "options/cf_options.h" +#include "rocksdb/cache.h" +#include "rocksdb/slice.h" +#include "test_util/sync_point.h" +#include "trace_replay/io_tracer.h" +#include "util/hash.h" + +namespace ROCKSDB_NAMESPACE { + +BlobFileCache::BlobFileCache(Cache* cache, + const ImmutableOptions* immutable_options, + const FileOptions* file_options, + uint32_t column_family_id, + HistogramImpl* blob_file_read_hist, + const std::shared_ptr& io_tracer) + : cache_(cache), + mutex_(kNumberOfMutexStripes, kGetSliceNPHash64UnseededFnPtr), + immutable_options_(immutable_options), + file_options_(file_options), + column_family_id_(column_family_id), + blob_file_read_hist_(blob_file_read_hist), + io_tracer_(io_tracer) { + assert(cache_); + assert(immutable_options_); + assert(file_options_); +} + +Status BlobFileCache::GetBlobFileReader( + uint64_t blob_file_number, + CacheHandleGuard* blob_file_reader) { + assert(blob_file_reader); + assert(blob_file_reader->IsEmpty()); + + const Slice key = GetSlice(&blob_file_number); + + assert(cache_); + + Cache::Handle* handle = cache_->Lookup(key); + if (handle) { + *blob_file_reader = CacheHandleGuard(cache_, handle); + return Status::OK(); + } + + TEST_SYNC_POINT("BlobFileCache::GetBlobFileReader:DoubleCheck"); + + // Check again while holding mutex + MutexLock lock(mutex_.get(key)); + + handle = cache_->Lookup(key); + if (handle) { + *blob_file_reader = CacheHandleGuard(cache_, handle); + return Status::OK(); + } + + assert(immutable_options_); + Statistics* const statistics = immutable_options_->stats; + + RecordTick(statistics, NO_FILE_OPENS); + + std::unique_ptr reader; + + { + assert(file_options_); + const Status s = BlobFileReader::Create( + *immutable_options_, *file_options_, column_family_id_, + blob_file_read_hist_, blob_file_number, io_tracer_, &reader); + if (!s.ok()) { + RecordTick(statistics, NO_FILE_ERRORS); + return s; + } + } + + { + constexpr size_t charge = 1; + + const Status s = cache_->Insert(key, reader.get(), charge, + &DeleteCacheEntry, &handle); + if (!s.ok()) { + RecordTick(statistics, NO_FILE_ERRORS); + return s; + } + } + + reader.release(); + + *blob_file_reader = CacheHandleGuard(cache_, handle); + + return Status::OK(); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_cache.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_cache.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,52 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "cache/cache_helpers.h" +#include "rocksdb/rocksdb_namespace.h" +#include "util/mutexlock.h" + +namespace ROCKSDB_NAMESPACE { + +class Cache; +struct ImmutableOptions; +struct FileOptions; +class HistogramImpl; +class Status; +class BlobFileReader; +class Slice; +class IOTracer; + +class BlobFileCache { + public: + BlobFileCache(Cache* cache, const ImmutableOptions* immutable_options, + const FileOptions* file_options, uint32_t column_family_id, + HistogramImpl* blob_file_read_hist, + const std::shared_ptr& io_tracer); + + BlobFileCache(const BlobFileCache&) = delete; + BlobFileCache& operator=(const BlobFileCache&) = delete; + + Status GetBlobFileReader(uint64_t blob_file_number, + CacheHandleGuard* blob_file_reader); + + private: + Cache* cache_; + // Note: mutex_ below is used to guard against multiple threads racing to open + // the same file. + Striped mutex_; + const ImmutableOptions* immutable_options_; + const FileOptions* file_options_; + uint32_t column_family_id_; + HistogramImpl* blob_file_read_hist_; + std::shared_ptr io_tracer_; + + static constexpr size_t kNumberOfMutexStripes = 1 << 7; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,268 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_cache.h" + +#include +#include + +#include "db/blob/blob_log_format.h" +#include "db/blob/blob_log_writer.h" +#include "env/mock_env.h" +#include "file/filename.h" +#include "file/read_write_util.h" +#include "file/writable_file_writer.h" +#include "options/cf_options.h" +#include "rocksdb/cache.h" +#include "rocksdb/env.h" +#include "rocksdb/file_system.h" +#include "rocksdb/options.h" +#include "rocksdb/statistics.h" +#include "test_util/sync_point.h" +#include "test_util/testharness.h" + +namespace ROCKSDB_NAMESPACE { + +namespace { + +// Creates a test blob file with a single blob in it. +void WriteBlobFile(uint32_t column_family_id, + const ImmutableOptions& immutable_options, + uint64_t blob_file_number) { + assert(!immutable_options.cf_paths.empty()); + + const std::string blob_file_path = + BlobFileName(immutable_options.cf_paths.front().path, blob_file_number); + + std::unique_ptr file; + ASSERT_OK(NewWritableFile(immutable_options.fs.get(), blob_file_path, &file, + FileOptions())); + + std::unique_ptr file_writer(new WritableFileWriter( + std::move(file), blob_file_path, FileOptions(), immutable_options.clock)); + + constexpr Statistics* statistics = nullptr; + constexpr bool use_fsync = false; + constexpr bool do_flush = false; + + BlobLogWriter blob_log_writer(std::move(file_writer), immutable_options.clock, + statistics, blob_file_number, use_fsync, + do_flush); + + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + + BlobLogHeader header(column_family_id, kNoCompression, has_ttl, + expiration_range); + + ASSERT_OK(blob_log_writer.WriteHeader(header)); + + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + std::string compressed_blob; + + uint64_t key_offset = 0; + uint64_t blob_offset = 0; + + ASSERT_OK(blob_log_writer.AddRecord(key, blob, &key_offset, &blob_offset)); + + BlobLogFooter footer; + footer.blob_count = 1; + footer.expiration_range = expiration_range; + + std::string checksum_method; + std::string checksum_value; + + ASSERT_OK( + blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value)); +} + +} // anonymous namespace + +class BlobFileCacheTest : public testing::Test { + protected: + BlobFileCacheTest() { mock_env_.reset(MockEnv::Create(Env::Default())); } + + std::unique_ptr mock_env_; +}; + +TEST_F(BlobFileCacheTest, GetBlobFileReader) { + Options options; + options.env = mock_env_.get(); + options.statistics = CreateDBStatistics(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileCacheTest_GetBlobFileReader"), + 0); + options.enable_blob_files = true; + + constexpr uint32_t column_family_id = 1; + ImmutableOptions immutable_options(options); + constexpr uint64_t blob_file_number = 123; + + WriteBlobFile(column_family_id, immutable_options, blob_file_number); + + constexpr size_t capacity = 10; + std::shared_ptr backing_cache = NewLRUCache(capacity); + + FileOptions file_options; + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + BlobFileCache blob_file_cache(backing_cache.get(), &immutable_options, + &file_options, column_family_id, + blob_file_read_hist, nullptr /*IOTracer*/); + + // First try: reader should be opened and put in cache + CacheHandleGuard first; + + ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &first)); + ASSERT_NE(first.GetValue(), nullptr); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0); + + // Second try: reader should be served from cache + CacheHandleGuard second; + + ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &second)); + ASSERT_NE(second.GetValue(), nullptr); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0); + + ASSERT_EQ(first.GetValue(), second.GetValue()); +} + +TEST_F(BlobFileCacheTest, GetBlobFileReader_Race) { + Options options; + options.env = mock_env_.get(); + options.statistics = CreateDBStatistics(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileCacheTest_GetBlobFileReader_Race"), + 0); + options.enable_blob_files = true; + + constexpr uint32_t column_family_id = 1; + ImmutableOptions immutable_options(options); + constexpr uint64_t blob_file_number = 123; + + WriteBlobFile(column_family_id, immutable_options, blob_file_number); + + constexpr size_t capacity = 10; + std::shared_ptr backing_cache = NewLRUCache(capacity); + + FileOptions file_options; + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + BlobFileCache blob_file_cache(backing_cache.get(), &immutable_options, + &file_options, column_family_id, + blob_file_read_hist, nullptr /*IOTracer*/); + + CacheHandleGuard first; + CacheHandleGuard second; + + SyncPoint::GetInstance()->SetCallBack( + "BlobFileCache::GetBlobFileReader:DoubleCheck", [&](void* /* arg */) { + // Disabling sync points to prevent infinite recursion + SyncPoint::GetInstance()->DisableProcessing(); + + ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &second)); + ASSERT_NE(second.GetValue(), nullptr); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &first)); + ASSERT_NE(first.GetValue(), nullptr); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0); + + ASSERT_EQ(first.GetValue(), second.GetValue()); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(BlobFileCacheTest, GetBlobFileReader_IOError) { + Options options; + options.env = mock_env_.get(); + options.statistics = CreateDBStatistics(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileCacheTest_GetBlobFileReader_IOError"), + 0); + options.enable_blob_files = true; + + constexpr size_t capacity = 10; + std::shared_ptr backing_cache = NewLRUCache(capacity); + + ImmutableOptions immutable_options(options); + FileOptions file_options; + constexpr uint32_t column_family_id = 1; + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + BlobFileCache blob_file_cache(backing_cache.get(), &immutable_options, + &file_options, column_family_id, + blob_file_read_hist, nullptr /*IOTracer*/); + + // Note: there is no blob file with the below number + constexpr uint64_t blob_file_number = 123; + + CacheHandleGuard reader; + + ASSERT_TRUE( + blob_file_cache.GetBlobFileReader(blob_file_number, &reader).IsIOError()); + ASSERT_EQ(reader.GetValue(), nullptr); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 1); +} + +TEST_F(BlobFileCacheTest, GetBlobFileReader_CacheFull) { + Options options; + options.env = mock_env_.get(); + options.statistics = CreateDBStatistics(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileCacheTest_GetBlobFileReader_CacheFull"), + 0); + options.enable_blob_files = true; + + constexpr uint32_t column_family_id = 1; + ImmutableOptions immutable_options(options); + constexpr uint64_t blob_file_number = 123; + + WriteBlobFile(column_family_id, immutable_options, blob_file_number); + + constexpr size_t capacity = 0; + constexpr int num_shard_bits = -1; // determined automatically + constexpr bool strict_capacity_limit = true; + std::shared_ptr backing_cache = + NewLRUCache(capacity, num_shard_bits, strict_capacity_limit); + + FileOptions file_options; + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + BlobFileCache blob_file_cache(backing_cache.get(), &immutable_options, + &file_options, column_family_id, + blob_file_read_hist, nullptr /*IOTracer*/); + + // Insert into cache should fail since it has zero capacity and + // strict_capacity_limit is set + CacheHandleGuard reader; + + ASSERT_TRUE(blob_file_cache.GetBlobFileReader(blob_file_number, &reader) + .IsIncomplete()); + ASSERT_EQ(reader.GetValue(), nullptr); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1); + ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 1); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,101 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +#pragma once + +#include "db/error_handler.h" +#include "db/event_helpers.h" +#include "file/sst_file_manager_impl.h" +#include "rocksdb/status.h" + +namespace ROCKSDB_NAMESPACE { + +class BlobFileCompletionCallback { + public: + BlobFileCompletionCallback( + SstFileManager* sst_file_manager, InstrumentedMutex* mutex, + ErrorHandler* error_handler, EventLogger* event_logger, + const std::vector>& listeners, + const std::string& dbname) + : event_logger_(event_logger), listeners_(listeners), dbname_(dbname) { +#ifndef ROCKSDB_LITE + sst_file_manager_ = sst_file_manager; + mutex_ = mutex; + error_handler_ = error_handler; +#else + (void)sst_file_manager; + (void)mutex; + (void)error_handler; +#endif // ROCKSDB_LITE + } + + void OnBlobFileCreationStarted(const std::string& file_name, + const std::string& column_family_name, + int job_id, + BlobFileCreationReason creation_reason) { +#ifndef ROCKSDB_LITE + // Notify the listeners. + EventHelpers::NotifyBlobFileCreationStarted(listeners_, dbname_, + column_family_name, file_name, + job_id, creation_reason); +#else + (void)file_name; + (void)column_family_name; + (void)job_id; + (void)creation_reason; +#endif + } + + Status OnBlobFileCompleted(const std::string& file_name, + const std::string& column_family_name, int job_id, + uint64_t file_number, + BlobFileCreationReason creation_reason, + const Status& report_status, + const std::string& checksum_value, + const std::string& checksum_method, + uint64_t blob_count, uint64_t blob_bytes) { + Status s; + +#ifndef ROCKSDB_LITE + auto sfm = static_cast(sst_file_manager_); + if (sfm) { + // Report new blob files to SstFileManagerImpl + s = sfm->OnAddFile(file_name); + if (sfm->IsMaxAllowedSpaceReached()) { + s = Status::SpaceLimit("Max allowed space was reached"); + TEST_SYNC_POINT( + "BlobFileCompletionCallback::CallBack::MaxAllowedSpaceReached"); + InstrumentedMutexLock l(mutex_); + error_handler_->SetBGError(s, BackgroundErrorReason::kFlush); + } + } +#endif // !ROCKSDB_LITE + + // Notify the listeners. + EventHelpers::LogAndNotifyBlobFileCreationFinished( + event_logger_, listeners_, dbname_, column_family_name, file_name, + job_id, file_number, creation_reason, + (!report_status.ok() ? report_status : s), + (checksum_value.empty() ? kUnknownFileChecksum : checksum_value), + (checksum_method.empty() ? kUnknownFileChecksumFuncName + : checksum_method), + blob_count, blob_bytes); + return s; + } + + private: +#ifndef ROCKSDB_LITE + SstFileManager* sst_file_manager_; + InstrumentedMutex* mutex_; + ErrorHandler* error_handler_; +#endif // ROCKSDB_LITE + EventLogger* event_logger_; + std::vector> listeners_; + std::string dbname_; +}; +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,134 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_garbage.h" + +#include +#include + +#include "logging/event_logger.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "test_util/sync_point.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { + +// Tags for custom fields. Note that these get persisted in the manifest, +// so existing tags should not be modified. +enum BlobFileGarbage::CustomFieldTags : uint32_t { + kEndMarker, + + // Add forward compatible fields here + + ///////////////////////////////////////////////////////////////////// + + kForwardIncompatibleMask = 1 << 6, + + // Add forward incompatible fields here +}; + +void BlobFileGarbage::EncodeTo(std::string* output) const { + PutVarint64(output, blob_file_number_); + PutVarint64(output, garbage_blob_count_); + PutVarint64(output, garbage_blob_bytes_); + + // Encode any custom fields here. The format to use is a Varint32 tag (see + // CustomFieldTags above) followed by a length prefixed slice. Unknown custom + // fields will be ignored during decoding unless they're in the forward + // incompatible range. + + TEST_SYNC_POINT_CALLBACK("BlobFileGarbage::EncodeTo::CustomFields", output); + + PutVarint32(output, kEndMarker); +} + +Status BlobFileGarbage::DecodeFrom(Slice* input) { + constexpr char class_name[] = "BlobFileGarbage"; + + if (!GetVarint64(input, &blob_file_number_)) { + return Status::Corruption(class_name, "Error decoding blob file number"); + } + + if (!GetVarint64(input, &garbage_blob_count_)) { + return Status::Corruption(class_name, "Error decoding garbage blob count"); + } + + if (!GetVarint64(input, &garbage_blob_bytes_)) { + return Status::Corruption(class_name, "Error decoding garbage blob bytes"); + } + + while (true) { + uint32_t custom_field_tag = 0; + if (!GetVarint32(input, &custom_field_tag)) { + return Status::Corruption(class_name, "Error decoding custom field tag"); + } + + if (custom_field_tag == kEndMarker) { + break; + } + + if (custom_field_tag & kForwardIncompatibleMask) { + return Status::Corruption( + class_name, "Forward incompatible custom field encountered"); + } + + Slice custom_field_value; + if (!GetLengthPrefixedSlice(input, &custom_field_value)) { + return Status::Corruption(class_name, + "Error decoding custom field value"); + } + } + + return Status::OK(); +} + +std::string BlobFileGarbage::DebugString() const { + std::ostringstream oss; + + oss << *this; + + return oss.str(); +} + +std::string BlobFileGarbage::DebugJSON() const { + JSONWriter jw; + + jw << *this; + + jw.EndObject(); + + return jw.Get(); +} + +bool operator==(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs) { + return lhs.GetBlobFileNumber() == rhs.GetBlobFileNumber() && + lhs.GetGarbageBlobCount() == rhs.GetGarbageBlobCount() && + lhs.GetGarbageBlobBytes() == rhs.GetGarbageBlobBytes(); +} + +bool operator!=(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs) { + return !(lhs == rhs); +} + +std::ostream& operator<<(std::ostream& os, + const BlobFileGarbage& blob_file_garbage) { + os << "blob_file_number: " << blob_file_garbage.GetBlobFileNumber() + << " garbage_blob_count: " << blob_file_garbage.GetGarbageBlobCount() + << " garbage_blob_bytes: " << blob_file_garbage.GetGarbageBlobBytes(); + + return os; +} + +JSONWriter& operator<<(JSONWriter& jw, + const BlobFileGarbage& blob_file_garbage) { + jw << "BlobFileNumber" << blob_file_garbage.GetBlobFileNumber() + << "GarbageBlobCount" << blob_file_garbage.GetGarbageBlobCount() + << "GarbageBlobBytes" << blob_file_garbage.GetGarbageBlobBytes(); + + return jw; +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,57 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include + +#include "db/blob/blob_constants.h" +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +class JSONWriter; +class Slice; +class Status; + +class BlobFileGarbage { + public: + BlobFileGarbage() = default; + + BlobFileGarbage(uint64_t blob_file_number, uint64_t garbage_blob_count, + uint64_t garbage_blob_bytes) + : blob_file_number_(blob_file_number), + garbage_blob_count_(garbage_blob_count), + garbage_blob_bytes_(garbage_blob_bytes) {} + + uint64_t GetBlobFileNumber() const { return blob_file_number_; } + uint64_t GetGarbageBlobCount() const { return garbage_blob_count_; } + uint64_t GetGarbageBlobBytes() const { return garbage_blob_bytes_; } + + void EncodeTo(std::string* output) const; + Status DecodeFrom(Slice* input); + + std::string DebugString() const; + std::string DebugJSON() const; + + private: + enum CustomFieldTags : uint32_t; + + uint64_t blob_file_number_ = kInvalidBlobFileNumber; + uint64_t garbage_blob_count_ = 0; + uint64_t garbage_blob_bytes_ = 0; +}; + +bool operator==(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs); +bool operator!=(const BlobFileGarbage& lhs, const BlobFileGarbage& rhs); + +std::ostream& operator<<(std::ostream& os, + const BlobFileGarbage& blob_file_garbage); +JSONWriter& operator<<(JSONWriter& jw, + const BlobFileGarbage& blob_file_garbage); + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_garbage_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,173 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_garbage.h" + +#include +#include +#include + +#include "test_util/sync_point.h" +#include "test_util/testharness.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { + +class BlobFileGarbageTest : public testing::Test { + public: + static void TestEncodeDecode(const BlobFileGarbage& blob_file_garbage) { + std::string encoded; + blob_file_garbage.EncodeTo(&encoded); + + BlobFileGarbage decoded; + Slice input(encoded); + ASSERT_OK(decoded.DecodeFrom(&input)); + + ASSERT_EQ(blob_file_garbage, decoded); + } +}; + +TEST_F(BlobFileGarbageTest, Empty) { + BlobFileGarbage blob_file_garbage; + + ASSERT_EQ(blob_file_garbage.GetBlobFileNumber(), kInvalidBlobFileNumber); + ASSERT_EQ(blob_file_garbage.GetGarbageBlobCount(), 0); + ASSERT_EQ(blob_file_garbage.GetGarbageBlobBytes(), 0); + + TestEncodeDecode(blob_file_garbage); +} + +TEST_F(BlobFileGarbageTest, NonEmpty) { + constexpr uint64_t blob_file_number = 123; + constexpr uint64_t garbage_blob_count = 1; + constexpr uint64_t garbage_blob_bytes = 9876; + + BlobFileGarbage blob_file_garbage(blob_file_number, garbage_blob_count, + garbage_blob_bytes); + + ASSERT_EQ(blob_file_garbage.GetBlobFileNumber(), blob_file_number); + ASSERT_EQ(blob_file_garbage.GetGarbageBlobCount(), garbage_blob_count); + ASSERT_EQ(blob_file_garbage.GetGarbageBlobBytes(), garbage_blob_bytes); + + TestEncodeDecode(blob_file_garbage); +} + +TEST_F(BlobFileGarbageTest, DecodeErrors) { + std::string str; + Slice slice(str); + + BlobFileGarbage blob_file_garbage; + + { + const Status s = blob_file_garbage.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "blob file number")); + } + + constexpr uint64_t blob_file_number = 123; + PutVarint64(&str, blob_file_number); + slice = str; + + { + const Status s = blob_file_garbage.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "garbage blob count")); + } + + constexpr uint64_t garbage_blob_count = 4567; + PutVarint64(&str, garbage_blob_count); + slice = str; + + { + const Status s = blob_file_garbage.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "garbage blob bytes")); + } + + constexpr uint64_t garbage_blob_bytes = 12345678; + PutVarint64(&str, garbage_blob_bytes); + slice = str; + + { + const Status s = blob_file_garbage.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "custom field tag")); + } + + constexpr uint32_t custom_tag = 2; + PutVarint32(&str, custom_tag); + slice = str; + + { + const Status s = blob_file_garbage.DecodeFrom(&slice); + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "custom field value")); + } +} + +TEST_F(BlobFileGarbageTest, ForwardCompatibleCustomField) { + SyncPoint::GetInstance()->SetCallBack( + "BlobFileGarbage::EncodeTo::CustomFields", [&](void* arg) { + std::string* output = static_cast(arg); + + constexpr uint32_t forward_compatible_tag = 2; + PutVarint32(output, forward_compatible_tag); + + PutLengthPrefixedSlice(output, "deadbeef"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr uint64_t blob_file_number = 678; + constexpr uint64_t garbage_blob_count = 9999; + constexpr uint64_t garbage_blob_bytes = 100000000; + + BlobFileGarbage blob_file_garbage(blob_file_number, garbage_blob_count, + garbage_blob_bytes); + + TestEncodeDecode(blob_file_garbage); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(BlobFileGarbageTest, ForwardIncompatibleCustomField) { + SyncPoint::GetInstance()->SetCallBack( + "BlobFileGarbage::EncodeTo::CustomFields", [&](void* arg) { + std::string* output = static_cast(arg); + + constexpr uint32_t forward_incompatible_tag = (1 << 6) + 1; + PutVarint32(output, forward_incompatible_tag); + + PutLengthPrefixedSlice(output, "foobar"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr uint64_t blob_file_number = 456; + constexpr uint64_t garbage_blob_count = 100; + constexpr uint64_t garbage_blob_bytes = 2000000; + + BlobFileGarbage blob_file_garbage(blob_file_number, garbage_blob_count, + garbage_blob_bytes); + + std::string encoded; + blob_file_garbage.EncodeTo(&encoded); + + BlobFileGarbage decoded_blob_file_addition; + Slice input(encoded); + const Status s = decoded_blob_file_addition.DecodeFrom(&input); + + ASSERT_TRUE(s.IsCorruption()); + ASSERT_TRUE(std::strstr(s.getState(), "Forward incompatible")); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_meta.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_meta.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_meta.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_meta.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,62 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_meta.h" + +#include +#include + +#include "db/blob/blob_log_format.h" +#include "rocksdb/slice.h" + +namespace ROCKSDB_NAMESPACE { +uint64_t SharedBlobFileMetaData::GetBlobFileSize() const { + return BlobLogHeader::kSize + total_blob_bytes_ + BlobLogFooter::kSize; +} + +std::string SharedBlobFileMetaData::DebugString() const { + std::ostringstream oss; + oss << (*this); + + return oss.str(); +} + +std::ostream& operator<<(std::ostream& os, + const SharedBlobFileMetaData& shared_meta) { + os << "blob_file_number: " << shared_meta.GetBlobFileNumber() + << " total_blob_count: " << shared_meta.GetTotalBlobCount() + << " total_blob_bytes: " << shared_meta.GetTotalBlobBytes() + << " checksum_method: " << shared_meta.GetChecksumMethod() + << " checksum_value: " + << Slice(shared_meta.GetChecksumValue()).ToString(/* hex */ true); + + return os; +} + +std::string BlobFileMetaData::DebugString() const { + std::ostringstream oss; + oss << (*this); + + return oss.str(); +} + +std::ostream& operator<<(std::ostream& os, const BlobFileMetaData& meta) { + const auto& shared_meta = meta.GetSharedMeta(); + assert(shared_meta); + os << (*shared_meta); + + os << " linked_ssts: {"; + for (uint64_t file_number : meta.GetLinkedSsts()) { + os << ' ' << file_number; + } + os << " }"; + + os << " garbage_blob_count: " << meta.GetGarbageBlobCount() + << " garbage_blob_bytes: " << meta.GetGarbageBlobBytes(); + + return os; +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_meta.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_meta.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_meta.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_meta.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,170 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include +#include +#include + +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +// SharedBlobFileMetaData represents the immutable part of blob files' metadata, +// like the blob file number, total number and size of blobs, or checksum +// method and value. There is supposed to be one object of this class per blob +// file (shared across all versions that include the blob file in question); +// hence, the type is neither copyable nor movable. A blob file can be marked +// obsolete when the corresponding SharedBlobFileMetaData object is destroyed. + +class SharedBlobFileMetaData { + public: + static std::shared_ptr Create( + uint64_t blob_file_number, uint64_t total_blob_count, + uint64_t total_blob_bytes, std::string checksum_method, + std::string checksum_value) { + return std::shared_ptr(new SharedBlobFileMetaData( + blob_file_number, total_blob_count, total_blob_bytes, + std::move(checksum_method), std::move(checksum_value))); + } + + template + static std::shared_ptr Create( + uint64_t blob_file_number, uint64_t total_blob_count, + uint64_t total_blob_bytes, std::string checksum_method, + std::string checksum_value, Deleter deleter) { + return std::shared_ptr( + new SharedBlobFileMetaData(blob_file_number, total_blob_count, + total_blob_bytes, std::move(checksum_method), + std::move(checksum_value)), + deleter); + } + + SharedBlobFileMetaData(const SharedBlobFileMetaData&) = delete; + SharedBlobFileMetaData& operator=(const SharedBlobFileMetaData&) = delete; + + SharedBlobFileMetaData(SharedBlobFileMetaData&&) = delete; + SharedBlobFileMetaData& operator=(SharedBlobFileMetaData&&) = delete; + + uint64_t GetBlobFileSize() const; + uint64_t GetBlobFileNumber() const { return blob_file_number_; } + uint64_t GetTotalBlobCount() const { return total_blob_count_; } + uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; } + const std::string& GetChecksumMethod() const { return checksum_method_; } + const std::string& GetChecksumValue() const { return checksum_value_; } + + std::string DebugString() const; + + private: + SharedBlobFileMetaData(uint64_t blob_file_number, uint64_t total_blob_count, + uint64_t total_blob_bytes, std::string checksum_method, + std::string checksum_value) + : blob_file_number_(blob_file_number), + total_blob_count_(total_blob_count), + total_blob_bytes_(total_blob_bytes), + checksum_method_(std::move(checksum_method)), + checksum_value_(std::move(checksum_value)) { + assert(checksum_method_.empty() == checksum_value_.empty()); + } + + uint64_t blob_file_number_; + uint64_t total_blob_count_; + uint64_t total_blob_bytes_; + std::string checksum_method_; + std::string checksum_value_; +}; + +std::ostream& operator<<(std::ostream& os, + const SharedBlobFileMetaData& shared_meta); + +// BlobFileMetaData contains the part of the metadata for blob files that can +// vary across versions, like the amount of garbage in the blob file. In +// addition, BlobFileMetaData objects point to and share the ownership of the +// SharedBlobFileMetaData object for the corresponding blob file. Similarly to +// SharedBlobFileMetaData, BlobFileMetaData are not copyable or movable. They +// are meant to be jointly owned by the versions in which the blob file has the +// same (immutable *and* mutable) state. + +class BlobFileMetaData { + public: + using LinkedSsts = std::unordered_set; + + static std::shared_ptr Create( + std::shared_ptr shared_meta, + LinkedSsts linked_ssts, uint64_t garbage_blob_count, + uint64_t garbage_blob_bytes) { + return std::shared_ptr( + new BlobFileMetaData(std::move(shared_meta), std::move(linked_ssts), + garbage_blob_count, garbage_blob_bytes)); + } + + BlobFileMetaData(const BlobFileMetaData&) = delete; + BlobFileMetaData& operator=(const BlobFileMetaData&) = delete; + + BlobFileMetaData(BlobFileMetaData&&) = delete; + BlobFileMetaData& operator=(BlobFileMetaData&&) = delete; + + const std::shared_ptr& GetSharedMeta() const { + return shared_meta_; + } + + uint64_t GetBlobFileSize() const { + assert(shared_meta_); + return shared_meta_->GetBlobFileSize(); + } + + uint64_t GetBlobFileNumber() const { + assert(shared_meta_); + return shared_meta_->GetBlobFileNumber(); + } + uint64_t GetTotalBlobCount() const { + assert(shared_meta_); + return shared_meta_->GetTotalBlobCount(); + } + uint64_t GetTotalBlobBytes() const { + assert(shared_meta_); + return shared_meta_->GetTotalBlobBytes(); + } + const std::string& GetChecksumMethod() const { + assert(shared_meta_); + return shared_meta_->GetChecksumMethod(); + } + const std::string& GetChecksumValue() const { + assert(shared_meta_); + return shared_meta_->GetChecksumValue(); + } + + const LinkedSsts& GetLinkedSsts() const { return linked_ssts_; } + + uint64_t GetGarbageBlobCount() const { return garbage_blob_count_; } + uint64_t GetGarbageBlobBytes() const { return garbage_blob_bytes_; } + + std::string DebugString() const; + + private: + BlobFileMetaData(std::shared_ptr shared_meta, + LinkedSsts linked_ssts, uint64_t garbage_blob_count, + uint64_t garbage_blob_bytes) + : shared_meta_(std::move(shared_meta)), + linked_ssts_(std::move(linked_ssts)), + garbage_blob_count_(garbage_blob_count), + garbage_blob_bytes_(garbage_blob_bytes) { + assert(shared_meta_); + assert(garbage_blob_count_ <= shared_meta_->GetTotalBlobCount()); + assert(garbage_blob_bytes_ <= shared_meta_->GetTotalBlobBytes()); + } + + std::shared_ptr shared_meta_; + LinkedSsts linked_ssts_; + uint64_t garbage_blob_count_; + uint64_t garbage_blob_bytes_; +}; + +std::ostream& operator<<(std::ostream& os, const BlobFileMetaData& meta); + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_reader.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_reader.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,582 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_reader.h" + +#include +#include + +#include "db/blob/blob_log_format.h" +#include "file/file_prefetch_buffer.h" +#include "file/filename.h" +#include "monitoring/statistics.h" +#include "options/cf_options.h" +#include "rocksdb/file_system.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "test_util/sync_point.h" +#include "util/compression.h" +#include "util/crc32c.h" +#include "util/stop_watch.h" + +namespace ROCKSDB_NAMESPACE { + +Status BlobFileReader::Create( + const ImmutableOptions& immutable_options, const FileOptions& file_options, + uint32_t column_family_id, HistogramImpl* blob_file_read_hist, + uint64_t blob_file_number, const std::shared_ptr& io_tracer, + std::unique_ptr* blob_file_reader) { + assert(blob_file_reader); + assert(!*blob_file_reader); + + uint64_t file_size = 0; + std::unique_ptr file_reader; + + { + const Status s = + OpenFile(immutable_options, file_options, blob_file_read_hist, + blob_file_number, io_tracer, &file_size, &file_reader); + if (!s.ok()) { + return s; + } + } + + assert(file_reader); + + Statistics* const statistics = immutable_options.stats; + + CompressionType compression_type = kNoCompression; + + { + const Status s = ReadHeader(file_reader.get(), column_family_id, statistics, + &compression_type); + if (!s.ok()) { + return s; + } + } + + { + const Status s = ReadFooter(file_reader.get(), file_size, statistics); + if (!s.ok()) { + return s; + } + } + + blob_file_reader->reset( + new BlobFileReader(std::move(file_reader), file_size, compression_type, + immutable_options.clock, statistics)); + + return Status::OK(); +} + +Status BlobFileReader::OpenFile( + const ImmutableOptions& immutable_options, const FileOptions& file_opts, + HistogramImpl* blob_file_read_hist, uint64_t blob_file_number, + const std::shared_ptr& io_tracer, uint64_t* file_size, + std::unique_ptr* file_reader) { + assert(file_size); + assert(file_reader); + + const auto& cf_paths = immutable_options.cf_paths; + assert(!cf_paths.empty()); + + const std::string blob_file_path = + BlobFileName(cf_paths.front().path, blob_file_number); + + FileSystem* const fs = immutable_options.fs.get(); + assert(fs); + + constexpr IODebugContext* dbg = nullptr; + + { + TEST_SYNC_POINT("BlobFileReader::OpenFile:GetFileSize"); + + const Status s = + fs->GetFileSize(blob_file_path, IOOptions(), file_size, dbg); + if (!s.ok()) { + return s; + } + } + + if (*file_size < BlobLogHeader::kSize + BlobLogFooter::kSize) { + return Status::Corruption("Malformed blob file"); + } + + std::unique_ptr file; + + { + TEST_SYNC_POINT("BlobFileReader::OpenFile:NewRandomAccessFile"); + + const Status s = + fs->NewRandomAccessFile(blob_file_path, file_opts, &file, dbg); + if (!s.ok()) { + return s; + } + } + + assert(file); + + if (immutable_options.advise_random_on_open) { + file->Hint(FSRandomAccessFile::kRandom); + } + + file_reader->reset(new RandomAccessFileReader( + std::move(file), blob_file_path, immutable_options.clock, io_tracer, + immutable_options.stats, BLOB_DB_BLOB_FILE_READ_MICROS, + blob_file_read_hist, immutable_options.rate_limiter.get(), + immutable_options.listeners)); + + return Status::OK(); +} + +Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader, + uint32_t column_family_id, + Statistics* statistics, + CompressionType* compression_type) { + assert(file_reader); + assert(compression_type); + + Slice header_slice; + Buffer buf; + AlignedBuf aligned_buf; + + { + TEST_SYNC_POINT("BlobFileReader::ReadHeader:ReadFromFile"); + + constexpr uint64_t read_offset = 0; + constexpr size_t read_size = BlobLogHeader::kSize; + + const Status s = + ReadFromFile(file_reader, read_offset, read_size, statistics, + &header_slice, &buf, &aligned_buf); + if (!s.ok()) { + return s; + } + + TEST_SYNC_POINT_CALLBACK("BlobFileReader::ReadHeader:TamperWithResult", + &header_slice); + } + + BlobLogHeader header; + + { + const Status s = header.DecodeFrom(header_slice); + if (!s.ok()) { + return s; + } + } + + constexpr ExpirationRange no_expiration_range; + + if (header.has_ttl || header.expiration_range != no_expiration_range) { + return Status::Corruption("Unexpected TTL blob file"); + } + + if (header.column_family_id != column_family_id) { + return Status::Corruption("Column family ID mismatch"); + } + + *compression_type = header.compression; + + return Status::OK(); +} + +Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader, + uint64_t file_size, Statistics* statistics) { + assert(file_size >= BlobLogHeader::kSize + BlobLogFooter::kSize); + assert(file_reader); + + Slice footer_slice; + Buffer buf; + AlignedBuf aligned_buf; + + { + TEST_SYNC_POINT("BlobFileReader::ReadFooter:ReadFromFile"); + + const uint64_t read_offset = file_size - BlobLogFooter::kSize; + constexpr size_t read_size = BlobLogFooter::kSize; + + const Status s = + ReadFromFile(file_reader, read_offset, read_size, statistics, + &footer_slice, &buf, &aligned_buf); + if (!s.ok()) { + return s; + } + + TEST_SYNC_POINT_CALLBACK("BlobFileReader::ReadFooter:TamperWithResult", + &footer_slice); + } + + BlobLogFooter footer; + + { + const Status s = footer.DecodeFrom(footer_slice); + if (!s.ok()) { + return s; + } + } + + constexpr ExpirationRange no_expiration_range; + + if (footer.expiration_range != no_expiration_range) { + return Status::Corruption("Unexpected TTL blob file"); + } + + return Status::OK(); +} + +Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader, + uint64_t read_offset, size_t read_size, + Statistics* statistics, Slice* slice, + Buffer* buf, AlignedBuf* aligned_buf) { + assert(slice); + assert(buf); + assert(aligned_buf); + + assert(file_reader); + + RecordTick(statistics, BLOB_DB_BLOB_FILE_BYTES_READ, read_size); + + Status s; + + if (file_reader->use_direct_io()) { + constexpr char* scratch = nullptr; + + s = file_reader->Read(IOOptions(), read_offset, read_size, slice, scratch, + aligned_buf); + } else { + buf->reset(new char[read_size]); + constexpr AlignedBuf* aligned_scratch = nullptr; + + s = file_reader->Read(IOOptions(), read_offset, read_size, slice, + buf->get(), aligned_scratch); + } + + if (!s.ok()) { + return s; + } + + if (slice->size() != read_size) { + return Status::Corruption("Failed to read data from blob file"); + } + + return Status::OK(); +} + +BlobFileReader::BlobFileReader( + std::unique_ptr&& file_reader, uint64_t file_size, + CompressionType compression_type, SystemClock* clock, + Statistics* statistics) + : file_reader_(std::move(file_reader)), + file_size_(file_size), + compression_type_(compression_type), + clock_(clock), + statistics_(statistics) { + assert(file_reader_); +} + +BlobFileReader::~BlobFileReader() = default; + +Status BlobFileReader::GetBlob(const ReadOptions& read_options, + const Slice& user_key, uint64_t offset, + uint64_t value_size, + CompressionType compression_type, + FilePrefetchBuffer* prefetch_buffer, + PinnableSlice* value, + uint64_t* bytes_read) const { + assert(value); + + const uint64_t key_size = user_key.size(); + + if (!IsValidBlobOffset(offset, key_size, value_size, file_size_)) { + return Status::Corruption("Invalid blob offset"); + } + + if (compression_type != compression_type_) { + return Status::Corruption("Compression type mismatch when reading blob"); + } + + // Note: if verify_checksum is set, we read the entire blob record to be able + // to perform the verification; otherwise, we just read the blob itself. Since + // the offset in BlobIndex actually points to the blob value, we need to make + // an adjustment in the former case. + const uint64_t adjustment = + read_options.verify_checksums + ? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) + : 0; + assert(offset >= adjustment); + + const uint64_t record_offset = offset - adjustment; + const uint64_t record_size = value_size + adjustment; + + Slice record_slice; + Buffer buf; + AlignedBuf aligned_buf; + + bool prefetched = false; + + if (prefetch_buffer) { + Status s; + constexpr bool for_compaction = true; + + prefetched = prefetch_buffer->TryReadFromCache( + IOOptions(), file_reader_.get(), record_offset, + static_cast(record_size), &record_slice, &s, for_compaction); + if (!s.ok()) { + return s; + } + } + + if (!prefetched) { + TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile"); + + const Status s = ReadFromFile(file_reader_.get(), record_offset, + static_cast(record_size), statistics_, + &record_slice, &buf, &aligned_buf); + if (!s.ok()) { + return s; + } + } + + TEST_SYNC_POINT_CALLBACK("BlobFileReader::GetBlob:TamperWithResult", + &record_slice); + + if (read_options.verify_checksums) { + const Status s = VerifyBlob(record_slice, user_key, value_size); + if (!s.ok()) { + return s; + } + } + + const Slice value_slice(record_slice.data() + adjustment, value_size); + + { + const Status s = UncompressBlobIfNeeded(value_slice, compression_type, + clock_, statistics_, value); + if (!s.ok()) { + return s; + } + } + + if (bytes_read) { + *bytes_read = record_size; + } + + return Status::OK(); +} + +void BlobFileReader::MultiGetBlob( + const ReadOptions& read_options, + const autovector>& user_keys, + const autovector& offsets, + const autovector& value_sizes, autovector& statuses, + autovector& values, uint64_t* bytes_read) const { + const size_t num_blobs = user_keys.size(); + assert(num_blobs > 0); + assert(num_blobs == offsets.size()); + assert(num_blobs == value_sizes.size()); + assert(num_blobs == statuses.size()); + assert(num_blobs == values.size()); + +#ifndef NDEBUG + for (size_t i = 0; i < offsets.size() - 1; ++i) { + assert(offsets[i] <= offsets[i + 1]); + } +#endif // !NDEBUG + + std::vector read_reqs(num_blobs); + autovector adjustments; + uint64_t total_len = 0; + for (size_t i = 0; i < num_blobs; ++i) { + const size_t key_size = user_keys[i].get().size(); + assert(IsValidBlobOffset(offsets[i], key_size, value_sizes[i], file_size_)); + const uint64_t adjustment = + read_options.verify_checksums + ? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) + : 0; + assert(offsets[i] >= adjustment); + adjustments.push_back(adjustment); + read_reqs[i].offset = offsets[i] - adjustment; + read_reqs[i].len = value_sizes[i] + adjustment; + total_len += read_reqs[i].len; + } + + RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, total_len); + + Buffer buf; + AlignedBuf aligned_buf; + + Status s; + bool direct_io = file_reader_->use_direct_io(); + if (direct_io) { + for (size_t i = 0; i < read_reqs.size(); ++i) { + read_reqs[i].scratch = nullptr; + } + } else { + buf.reset(new char[total_len]); + std::ptrdiff_t pos = 0; + for (size_t i = 0; i < read_reqs.size(); ++i) { + read_reqs[i].scratch = buf.get() + pos; + pos += read_reqs[i].len; + } + } + TEST_SYNC_POINT("BlobFileReader::MultiGetBlob:ReadFromFile"); + s = file_reader_->MultiRead(IOOptions(), read_reqs.data(), read_reqs.size(), + direct_io ? &aligned_buf : nullptr); + if (!s.ok()) { + for (auto& req : read_reqs) { + req.status.PermitUncheckedError(); + } + for (size_t i = 0; i < num_blobs; ++i) { + assert(statuses[i]); + *statuses[i] = s; + } + return; + } + + assert(s.ok()); + for (size_t i = 0; i < num_blobs; ++i) { + auto& req = read_reqs[i]; + assert(statuses[i]); + if (req.status.ok() && req.result.size() != req.len) { + req.status = IOStatus::Corruption("Failed to read data from blob file"); + } + *statuses[i] = req.status; + } + + if (read_options.verify_checksums) { + for (size_t i = 0; i < num_blobs; ++i) { + assert(statuses[i]); + if (!statuses[i]->ok()) { + continue; + } + const Slice& record_slice = read_reqs[i].result; + s = VerifyBlob(record_slice, user_keys[i], value_sizes[i]); + if (!s.ok()) { + assert(statuses[i]); + *statuses[i] = s; + } + } + } + + for (size_t i = 0; i < num_blobs; ++i) { + assert(statuses[i]); + if (!statuses[i]->ok()) { + continue; + } + const Slice& record_slice = read_reqs[i].result; + const Slice value_slice(record_slice.data() + adjustments[i], + value_sizes[i]); + s = UncompressBlobIfNeeded(value_slice, compression_type_, clock_, + statistics_, values[i]); + if (!s.ok()) { + *statuses[i] = s; + } + } + + if (bytes_read) { + uint64_t total_bytes = 0; + for (const auto& req : read_reqs) { + total_bytes += req.result.size(); + } + *bytes_read = total_bytes; + } +} + +Status BlobFileReader::VerifyBlob(const Slice& record_slice, + const Slice& user_key, uint64_t value_size) { + BlobLogRecord record; + + const Slice header_slice(record_slice.data(), BlobLogRecord::kHeaderSize); + + { + const Status s = record.DecodeHeaderFrom(header_slice); + if (!s.ok()) { + return s; + } + } + + if (record.key_size != user_key.size()) { + return Status::Corruption("Key size mismatch when reading blob"); + } + + if (record.value_size != value_size) { + return Status::Corruption("Value size mismatch when reading blob"); + } + + record.key = + Slice(record_slice.data() + BlobLogRecord::kHeaderSize, record.key_size); + if (record.key != user_key) { + return Status::Corruption("Key mismatch when reading blob"); + } + + record.value = Slice(record.key.data() + record.key_size, value_size); + + { + TEST_SYNC_POINT_CALLBACK("BlobFileReader::VerifyBlob:CheckBlobCRC", + &record); + + const Status s = record.CheckBlobCRC(); + if (!s.ok()) { + return s; + } + } + + return Status::OK(); +} + +Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice, + CompressionType compression_type, + SystemClock* clock, + Statistics* statistics, + PinnableSlice* value) { + assert(value); + + if (compression_type == kNoCompression) { + SaveValue(value_slice, value); + + return Status::OK(); + } + + UncompressionContext context(compression_type); + UncompressionInfo info(context, UncompressionDict::GetEmptyDict(), + compression_type); + + size_t uncompressed_size = 0; + constexpr uint32_t compression_format_version = 2; + constexpr MemoryAllocator* allocator = nullptr; + + CacheAllocationPtr output; + + { + StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS); + output = UncompressData(info, value_slice.data(), value_slice.size(), + &uncompressed_size, compression_format_version, + allocator); + } + + TEST_SYNC_POINT_CALLBACK( + "BlobFileReader::UncompressBlobIfNeeded:TamperWithResult", &output); + + if (!output) { + return Status::Corruption("Unable to uncompress blob"); + } + + SaveValue(Slice(output.get(), uncompressed_size), value); + + return Status::OK(); +} + +void BlobFileReader::SaveValue(const Slice& src, PinnableSlice* dst) { + assert(dst); + + if (dst->IsPinned()) { + dst->Reset(); + } + + dst->PinSelf(src); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_reader.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_reader.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,106 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include + +#include "file/random_access_file_reader.h" +#include "rocksdb/compression_type.h" +#include "rocksdb/rocksdb_namespace.h" +#include "util/autovector.h" + +namespace ROCKSDB_NAMESPACE { + +class Status; +struct ImmutableOptions; +struct FileOptions; +class HistogramImpl; +struct ReadOptions; +class Slice; +class FilePrefetchBuffer; +class PinnableSlice; +class Statistics; + +class BlobFileReader { + public: + static Status Create(const ImmutableOptions& immutable_options, + const FileOptions& file_options, + uint32_t column_family_id, + HistogramImpl* blob_file_read_hist, + uint64_t blob_file_number, + const std::shared_ptr& io_tracer, + std::unique_ptr* reader); + + BlobFileReader(const BlobFileReader&) = delete; + BlobFileReader& operator=(const BlobFileReader&) = delete; + + ~BlobFileReader(); + + Status GetBlob(const ReadOptions& read_options, const Slice& user_key, + uint64_t offset, uint64_t value_size, + CompressionType compression_type, + FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value, + uint64_t* bytes_read) const; + + // offsets must be sorted in ascending order by caller. + void MultiGetBlob( + const ReadOptions& read_options, + const autovector>& user_keys, + const autovector& offsets, + const autovector& value_sizes, autovector& statuses, + autovector& values, uint64_t* bytes_read) const; + + CompressionType GetCompressionType() const { return compression_type_; } + + uint64_t GetFileSize() const { return file_size_; } + + private: + BlobFileReader(std::unique_ptr&& file_reader, + uint64_t file_size, CompressionType compression_type, + SystemClock* clock, Statistics* statistics); + + static Status OpenFile(const ImmutableOptions& immutable_options, + const FileOptions& file_opts, + HistogramImpl* blob_file_read_hist, + uint64_t blob_file_number, + const std::shared_ptr& io_tracer, + uint64_t* file_size, + std::unique_ptr* file_reader); + + static Status ReadHeader(const RandomAccessFileReader* file_reader, + uint32_t column_family_id, Statistics* statistics, + CompressionType* compression_type); + + static Status ReadFooter(const RandomAccessFileReader* file_reader, + uint64_t file_size, Statistics* statistics); + + using Buffer = std::unique_ptr; + + static Status ReadFromFile(const RandomAccessFileReader* file_reader, + uint64_t read_offset, size_t read_size, + Statistics* statistics, Slice* slice, Buffer* buf, + AlignedBuf* aligned_buf); + + static Status VerifyBlob(const Slice& record_slice, const Slice& user_key, + uint64_t value_size); + + static Status UncompressBlobIfNeeded(const Slice& value_slice, + CompressionType compression_type, + SystemClock* clock, + Statistics* statistics, + PinnableSlice* value); + + static void SaveValue(const Slice& src, PinnableSlice* dst); + + std::unique_ptr file_reader_; + uint64_t file_size_; + CompressionType compression_type_; + SystemClock* clock_; + Statistics* statistics_; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,974 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_file_reader.h" + +#include +#include + +#include "db/blob/blob_log_format.h" +#include "db/blob/blob_log_writer.h" +#include "env/mock_env.h" +#include "file/filename.h" +#include "file/read_write_util.h" +#include "file/writable_file_writer.h" +#include "options/cf_options.h" +#include "rocksdb/env.h" +#include "rocksdb/file_system.h" +#include "rocksdb/options.h" +#include "test_util/sync_point.h" +#include "test_util/testharness.h" +#include "util/compression.h" +#include "utilities/fault_injection_env.h" + +namespace ROCKSDB_NAMESPACE { + +namespace { + +// Creates a test blob file with `num` blobs in it. +void WriteBlobFile(const ImmutableOptions& immutable_options, + uint32_t column_family_id, bool has_ttl, + const ExpirationRange& expiration_range_header, + const ExpirationRange& expiration_range_footer, + uint64_t blob_file_number, const std::vector& keys, + const std::vector& blobs, CompressionType compression, + std::vector& blob_offsets, + std::vector& blob_sizes) { + assert(!immutable_options.cf_paths.empty()); + size_t num = keys.size(); + assert(num == blobs.size()); + assert(num == blob_offsets.size()); + assert(num == blob_sizes.size()); + + const std::string blob_file_path = + BlobFileName(immutable_options.cf_paths.front().path, blob_file_number); + std::unique_ptr file; + ASSERT_OK(NewWritableFile(immutable_options.fs.get(), blob_file_path, &file, + FileOptions())); + + std::unique_ptr file_writer(new WritableFileWriter( + std::move(file), blob_file_path, FileOptions(), immutable_options.clock)); + + constexpr Statistics* statistics = nullptr; + constexpr bool use_fsync = false; + constexpr bool do_flush = false; + + BlobLogWriter blob_log_writer(std::move(file_writer), immutable_options.clock, + statistics, blob_file_number, use_fsync, + do_flush); + + BlobLogHeader header(column_family_id, compression, has_ttl, + expiration_range_header); + + ASSERT_OK(blob_log_writer.WriteHeader(header)); + + std::vector compressed_blobs(num); + std::vector blobs_to_write(num); + if (kNoCompression == compression) { + for (size_t i = 0; i < num; ++i) { + blobs_to_write[i] = blobs[i]; + blob_sizes[i] = blobs[i].size(); + } + } else { + CompressionOptions opts; + CompressionContext context(compression); + constexpr uint64_t sample_for_compression = 0; + CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), + compression, sample_for_compression); + + constexpr uint32_t compression_format_version = 2; + + for (size_t i = 0; i < num; ++i) { + ASSERT_TRUE(CompressData(blobs[i], info, compression_format_version, + &compressed_blobs[i])); + blobs_to_write[i] = compressed_blobs[i]; + blob_sizes[i] = compressed_blobs[i].size(); + } + } + + for (size_t i = 0; i < num; ++i) { + uint64_t key_offset = 0; + ASSERT_OK(blob_log_writer.AddRecord(keys[i], blobs_to_write[i], &key_offset, + &blob_offsets[i])); + } + + BlobLogFooter footer; + footer.blob_count = num; + footer.expiration_range = expiration_range_footer; + + std::string checksum_method; + std::string checksum_value; + ASSERT_OK( + blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value)); +} + +// Creates a test blob file with a single blob in it. Note: this method +// makes it possible to test various corner cases by allowing the caller +// to specify the contents of various blob file header/footer fields. +void WriteBlobFile(const ImmutableOptions& immutable_options, + uint32_t column_family_id, bool has_ttl, + const ExpirationRange& expiration_range_header, + const ExpirationRange& expiration_range_footer, + uint64_t blob_file_number, const Slice& key, + const Slice& blob, CompressionType compression, + uint64_t* blob_offset, uint64_t* blob_size) { + std::vector keys{key}; + std::vector blobs{blob}; + std::vector blob_offsets{0}; + std::vector blob_sizes{0}; + WriteBlobFile(immutable_options, column_family_id, has_ttl, + expiration_range_header, expiration_range_footer, + blob_file_number, keys, blobs, compression, blob_offsets, + blob_sizes); + if (blob_offset) { + *blob_offset = blob_offsets[0]; + } + if (blob_size) { + *blob_size = blob_sizes[0]; + } +} + +} // anonymous namespace + +class BlobFileReaderTest : public testing::Test { + protected: + BlobFileReaderTest() { mock_env_.reset(MockEnv::Create(Env::Default())); } + std::unique_ptr mock_env_; +}; + +TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) { + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileReaderTest_CreateReaderAndGetBlob"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + constexpr uint64_t blob_file_number = 1; + constexpr size_t num_blobs = 3; + const std::vector key_strs = {"key1", "key2", "key3"}; + const std::vector blob_strs = {"blob1", "blob2", "blob3"}; + + const std::vector keys = {key_strs[0], key_strs[1], key_strs[2]}; + const std::vector blobs = {blob_strs[0], blob_strs[1], blob_strs[2]}; + + std::vector blob_offsets(keys.size()); + std::vector blob_sizes(keys.size()); + + WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range, + expiration_range, blob_file_number, keys, blobs, kNoCompression, + blob_offsets, blob_sizes); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + ASSERT_OK(BlobFileReader::Create( + immutable_options, FileOptions(), column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, &reader)); + + // Make sure the blob can be retrieved with and without checksum verification + ReadOptions read_options; + read_options.verify_checksums = false; + + constexpr FilePrefetchBuffer* prefetch_buffer = nullptr; + + { + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_OK(reader->GetBlob(read_options, keys[0], blob_offsets[0], + blob_sizes[0], kNoCompression, prefetch_buffer, + &value, &bytes_read)); + ASSERT_EQ(value, blobs[0]); + ASSERT_EQ(bytes_read, blob_sizes[0]); + + // MultiGetBlob + bytes_read = 0; + size_t total_size = 0; + autovector> key_refs; + for (const auto& key_ref : keys) { + key_refs.emplace_back(std::cref(key_ref)); + } + autovector offsets{blob_offsets[0], blob_offsets[1], + blob_offsets[2]}; + autovector sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]}; + std::array statuses_buf; + autovector statuses{&statuses_buf[0], &statuses_buf[1], + &statuses_buf[2]}; + std::array value_buf; + autovector values{&value_buf[0], &value_buf[1], + &value_buf[2]}; + reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses, + values, &bytes_read); + for (size_t i = 0; i < num_blobs; ++i) { + ASSERT_OK(statuses_buf[i]); + ASSERT_EQ(value_buf[i], blobs[i]); + total_size += blob_sizes[i]; + } + ASSERT_EQ(bytes_read, total_size); + } + + read_options.verify_checksums = true; + + { + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_OK(reader->GetBlob(read_options, keys[1], blob_offsets[1], + blob_sizes[1], kNoCompression, prefetch_buffer, + &value, &bytes_read)); + ASSERT_EQ(value, blobs[1]); + + const uint64_t key_size = keys[1].size(); + ASSERT_EQ(bytes_read, + BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) + + blob_sizes[1]); + } + + // Invalid offset (too close to start of file) + { + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(read_options, keys[0], blob_offsets[0] - 1, + blob_sizes[0], kNoCompression, prefetch_buffer, + &value, &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + } + + // Invalid offset (too close to end of file) + { + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(read_options, keys[2], blob_offsets[2] + 1, + blob_sizes[2], kNoCompression, prefetch_buffer, + &value, &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + } + + // Incorrect compression type + { + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(read_options, keys[0], blob_offsets[0], + blob_sizes[0], kZSTD, prefetch_buffer, &value, + &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + } + + // Incorrect key size + { + constexpr char shorter_key[] = "k"; + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(read_options, shorter_key, + blob_offsets[0] - + (keys[0].size() - sizeof(shorter_key) + 1), + blob_sizes[0], kNoCompression, prefetch_buffer, + &value, &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + + // MultiGetBlob + autovector> key_refs; + for (const auto& key_ref : keys) { + key_refs.emplace_back(std::cref(key_ref)); + } + Slice shorter_key_slice(shorter_key, sizeof(shorter_key) - 1); + key_refs[1] = std::cref(shorter_key_slice); + + autovector offsets{ + blob_offsets[0], + blob_offsets[1] - (keys[1].size() - key_refs[1].get().size()), + blob_offsets[2]}; + autovector sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]}; + std::array statuses_buf; + autovector statuses{&statuses_buf[0], &statuses_buf[1], + &statuses_buf[2]}; + std::array value_buf; + autovector values{&value_buf[0], &value_buf[1], + &value_buf[2]}; + reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses, + values, &bytes_read); + for (size_t i = 0; i < num_blobs; ++i) { + if (i == 1) { + ASSERT_TRUE(statuses_buf[i].IsCorruption()); + } else { + ASSERT_OK(statuses_buf[i]); + } + } + } + + // Incorrect key + { + constexpr char incorrect_key[] = "foo1"; + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(read_options, incorrect_key, blob_offsets[0], + blob_sizes[0], kNoCompression, prefetch_buffer, + &value, &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + + // MultiGetBlob + autovector> key_refs; + for (const auto& key_ref : keys) { + key_refs.emplace_back(std::cref(key_ref)); + } + Slice wrong_key_slice(incorrect_key, sizeof(incorrect_key) - 1); + key_refs[2] = std::cref(wrong_key_slice); + + autovector offsets{blob_offsets[0], blob_offsets[1], + blob_offsets[2]}; + autovector sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]}; + std::array statuses_buf; + autovector statuses{&statuses_buf[0], &statuses_buf[1], + &statuses_buf[2]}; + std::array value_buf; + autovector values{&value_buf[0], &value_buf[1], + &value_buf[2]}; + reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses, + values, &bytes_read); + for (size_t i = 0; i < num_blobs; ++i) { + if (i == num_blobs - 1) { + ASSERT_TRUE(statuses_buf[i].IsCorruption()); + } else { + ASSERT_OK(statuses_buf[i]); + } + } + } + + // Incorrect value size + { + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(read_options, keys[1], blob_offsets[1], + blob_sizes[1] + 1, kNoCompression, + prefetch_buffer, &value, &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + + // MultiGetBlob + autovector> key_refs; + for (const auto& key_ref : keys) { + key_refs.emplace_back(std::cref(key_ref)); + } + autovector offsets{blob_offsets[0], blob_offsets[1], + blob_offsets[2]}; + autovector sizes{blob_sizes[0], blob_sizes[1] + 1, blob_sizes[2]}; + std::array statuses_buf; + autovector statuses{&statuses_buf[0], &statuses_buf[1], + &statuses_buf[2]}; + std::array value_buf; + autovector values{&value_buf[0], &value_buf[1], + &value_buf[2]}; + reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses, + values, &bytes_read); + for (size_t i = 0; i < num_blobs; ++i) { + if (i != 1) { + ASSERT_OK(statuses_buf[i]); + } else { + ASSERT_TRUE(statuses_buf[i].IsCorruption()); + } + } + } +} + +TEST_F(BlobFileReaderTest, Malformed) { + // Write a blob file consisting of nothing but a header, and make sure we + // detect the error when we open it for reading + + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_Malformed"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr uint64_t blob_file_number = 1; + + { + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + + const std::string blob_file_path = + BlobFileName(immutable_options.cf_paths.front().path, blob_file_number); + + std::unique_ptr file; + ASSERT_OK(NewWritableFile(immutable_options.fs.get(), blob_file_path, &file, + FileOptions())); + + std::unique_ptr file_writer( + new WritableFileWriter(std::move(file), blob_file_path, FileOptions(), + immutable_options.clock)); + + constexpr Statistics* statistics = nullptr; + constexpr bool use_fsync = false; + constexpr bool do_flush = false; + + BlobLogWriter blob_log_writer(std::move(file_writer), + immutable_options.clock, statistics, + blob_file_number, use_fsync, do_flush); + + BlobLogHeader header(column_family_id, kNoCompression, has_ttl, + expiration_range); + + ASSERT_OK(blob_log_writer.WriteHeader(header)); + } + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(), + column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, + &reader) + .IsCorruption()); +} + +TEST_F(BlobFileReaderTest, TTL) { + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_TTL"), 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = true; + constexpr ExpirationRange expiration_range; + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range, + expiration_range, blob_file_number, key, blob, kNoCompression, + &blob_offset, &blob_size); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(), + column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, + &reader) + .IsCorruption()); +} + +TEST_F(BlobFileReaderTest, ExpirationRangeInHeader) { + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileReaderTest_ExpirationRangeInHeader"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + const ExpirationRange expiration_range_header( + 1, 2); // can be made constexpr when we adopt C++14 + constexpr ExpirationRange expiration_range_footer; + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, + expiration_range_header, expiration_range_footer, + blob_file_number, key, blob, kNoCompression, &blob_offset, + &blob_size); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(), + column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, + &reader) + .IsCorruption()); +} + +TEST_F(BlobFileReaderTest, ExpirationRangeInFooter) { + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileReaderTest_ExpirationRangeInFooter"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range_header; + const ExpirationRange expiration_range_footer( + 1, 2); // can be made constexpr when we adopt C++14 + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, + expiration_range_header, expiration_range_footer, + blob_file_number, key, blob, kNoCompression, &blob_offset, + &blob_size); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(), + column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, + &reader) + .IsCorruption()); +} + +TEST_F(BlobFileReaderTest, IncorrectColumnFamily) { + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileReaderTest_IncorrectColumnFamily"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range, + expiration_range, blob_file_number, key, blob, kNoCompression, + &blob_offset, &blob_size); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + constexpr uint32_t incorrect_column_family_id = 2; + + ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(), + incorrect_column_family_id, + blob_file_read_hist, blob_file_number, + nullptr /*IOTracer*/, &reader) + .IsCorruption()); +} + +TEST_F(BlobFileReaderTest, BlobCRCError) { + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_BlobCRCError"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range, + expiration_range, blob_file_number, key, blob, kNoCompression, + &blob_offset, &blob_size); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + ASSERT_OK(BlobFileReader::Create( + immutable_options, FileOptions(), column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, &reader)); + + SyncPoint::GetInstance()->SetCallBack( + "BlobFileReader::VerifyBlob:CheckBlobCRC", [](void* arg) { + BlobLogRecord* const record = static_cast(arg); + assert(record); + + record->blob_crc = 0xfaceb00c; + }); + + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr FilePrefetchBuffer* prefetch_buffer = nullptr; + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(ReadOptions(), key, blob_offset, blob_size, + kNoCompression, prefetch_buffer, &value, + &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(BlobFileReaderTest, Compression) { + if (!Snappy_Supported()) { + return; + } + + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_Compression"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range, + expiration_range, blob_file_number, key, blob, + kSnappyCompression, &blob_offset, &blob_size); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + ASSERT_OK(BlobFileReader::Create( + immutable_options, FileOptions(), column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, &reader)); + + // Make sure the blob can be retrieved with and without checksum verification + ReadOptions read_options; + read_options.verify_checksums = false; + + constexpr FilePrefetchBuffer* prefetch_buffer = nullptr; + + { + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_OK(reader->GetBlob(read_options, key, blob_offset, blob_size, + kSnappyCompression, prefetch_buffer, &value, + &bytes_read)); + ASSERT_EQ(value, blob); + ASSERT_EQ(bytes_read, blob_size); + } + + read_options.verify_checksums = true; + + { + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_OK(reader->GetBlob(read_options, key, blob_offset, blob_size, + kSnappyCompression, prefetch_buffer, &value, + &bytes_read)); + ASSERT_EQ(value, blob); + + constexpr uint64_t key_size = sizeof(key) - 1; + ASSERT_EQ(bytes_read, + BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) + + blob_size); + } +} + +TEST_F(BlobFileReaderTest, UncompressionError) { + if (!Snappy_Supported()) { + return; + } + + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileReaderTest_UncompressionError"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range, + expiration_range, blob_file_number, key, blob, + kSnappyCompression, &blob_offset, &blob_size); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + ASSERT_OK(BlobFileReader::Create( + immutable_options, FileOptions(), column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, &reader)); + + SyncPoint::GetInstance()->SetCallBack( + "BlobFileReader::UncompressBlobIfNeeded:TamperWithResult", [](void* arg) { + CacheAllocationPtr* const output = + static_cast(arg); + assert(output); + + output->reset(); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr FilePrefetchBuffer* prefetch_buffer = nullptr; + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(ReadOptions(), key, blob_offset, blob_size, + kSnappyCompression, prefetch_buffer, &value, + &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +class BlobFileReaderIOErrorTest + : public testing::Test, + public testing::WithParamInterface { + protected: + BlobFileReaderIOErrorTest() : sync_point_(GetParam()) { + mock_env_.reset(MockEnv::Create(Env::Default())); + fault_injection_env_.reset(new FaultInjectionTestEnv(mock_env_.get())); + } + + std::unique_ptr mock_env_; + std::unique_ptr fault_injection_env_; + std::string sync_point_; +}; + +INSTANTIATE_TEST_CASE_P(BlobFileReaderTest, BlobFileReaderIOErrorTest, + ::testing::ValuesIn(std::vector{ + "BlobFileReader::OpenFile:GetFileSize", + "BlobFileReader::OpenFile:NewRandomAccessFile", + "BlobFileReader::ReadHeader:ReadFromFile", + "BlobFileReader::ReadFooter:ReadFromFile", + "BlobFileReader::GetBlob:ReadFromFile"})); + +TEST_P(BlobFileReaderIOErrorTest, IOError) { + // Simulates an I/O error during the specified step + + Options options; + options.env = fault_injection_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(fault_injection_env_.get(), + "BlobFileReaderIOErrorTest_IOError"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range, + expiration_range, blob_file_number, key, blob, kNoCompression, + &blob_offset, &blob_size); + + SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) { + fault_injection_env_->SetFilesystemActive(false, + Status::IOError(sync_point_)); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + const Status s = BlobFileReader::Create( + immutable_options, FileOptions(), column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, &reader); + + const bool fail_during_create = + (sync_point_ != "BlobFileReader::GetBlob:ReadFromFile"); + + if (fail_during_create) { + ASSERT_TRUE(s.IsIOError()); + } else { + ASSERT_OK(s); + + constexpr FilePrefetchBuffer* prefetch_buffer = nullptr; + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(ReadOptions(), key, blob_offset, blob_size, + kNoCompression, prefetch_buffer, &value, + &bytes_read) + .IsIOError()); + ASSERT_EQ(bytes_read, 0); + } + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +class BlobFileReaderDecodingErrorTest + : public testing::Test, + public testing::WithParamInterface { + protected: + BlobFileReaderDecodingErrorTest() : sync_point_(GetParam()) { + mock_env_.reset(MockEnv::Create(Env::Default())); + } + + std::unique_ptr mock_env_; + std::string sync_point_; +}; + +INSTANTIATE_TEST_CASE_P(BlobFileReaderTest, BlobFileReaderDecodingErrorTest, + ::testing::ValuesIn(std::vector{ + "BlobFileReader::ReadHeader:TamperWithResult", + "BlobFileReader::ReadFooter:TamperWithResult", + "BlobFileReader::GetBlob:TamperWithResult"})); + +TEST_P(BlobFileReaderDecodingErrorTest, DecodingError) { + Options options; + options.env = mock_env_.get(); + options.cf_paths.emplace_back( + test::PerThreadDBPath(mock_env_.get(), + "BlobFileReaderDecodingErrorTest_DecodingError"), + 0); + options.enable_blob_files = true; + + ImmutableOptions immutable_options(options); + + constexpr uint32_t column_family_id = 1; + constexpr bool has_ttl = false; + constexpr ExpirationRange expiration_range; + constexpr uint64_t blob_file_number = 1; + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + + uint64_t blob_offset = 0; + uint64_t blob_size = 0; + + WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range, + expiration_range, blob_file_number, key, blob, kNoCompression, + &blob_offset, &blob_size); + + SyncPoint::GetInstance()->SetCallBack(sync_point_, [](void* arg) { + Slice* const slice = static_cast(arg); + assert(slice); + assert(!slice->empty()); + + slice->remove_prefix(1); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr HistogramImpl* blob_file_read_hist = nullptr; + + std::unique_ptr reader; + + const Status s = BlobFileReader::Create( + immutable_options, FileOptions(), column_family_id, blob_file_read_hist, + blob_file_number, nullptr /*IOTracer*/, &reader); + + const bool fail_during_create = + sync_point_ != "BlobFileReader::GetBlob:TamperWithResult"; + + if (fail_during_create) { + ASSERT_TRUE(s.IsCorruption()); + } else { + ASSERT_OK(s); + + constexpr FilePrefetchBuffer* prefetch_buffer = nullptr; + PinnableSlice value; + uint64_t bytes_read = 0; + + ASSERT_TRUE(reader + ->GetBlob(ReadOptions(), key, blob_offset, blob_size, + kNoCompression, prefetch_buffer, &value, + &bytes_read) + .IsCorruption()); + ASSERT_EQ(bytes_read, 0); + } + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,100 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_garbage_meter.h" + +#include "db/blob/blob_index.h" +#include "db/blob/blob_log_format.h" +#include "db/dbformat.h" + +namespace ROCKSDB_NAMESPACE { + +Status BlobGarbageMeter::ProcessInFlow(const Slice& key, const Slice& value) { + uint64_t blob_file_number = kInvalidBlobFileNumber; + uint64_t bytes = 0; + + const Status s = Parse(key, value, &blob_file_number, &bytes); + if (!s.ok()) { + return s; + } + + if (blob_file_number == kInvalidBlobFileNumber) { + return Status::OK(); + } + + flows_[blob_file_number].AddInFlow(bytes); + + return Status::OK(); +} + +Status BlobGarbageMeter::ProcessOutFlow(const Slice& key, const Slice& value) { + uint64_t blob_file_number = kInvalidBlobFileNumber; + uint64_t bytes = 0; + + const Status s = Parse(key, value, &blob_file_number, &bytes); + if (!s.ok()) { + return s; + } + + if (blob_file_number == kInvalidBlobFileNumber) { + return Status::OK(); + } + + // Note: in order to measure the amount of additional garbage, we only need to + // track the outflow for preexisting files, i.e. those that also had inflow. + // (Newly written files would only have outflow.) + auto it = flows_.find(blob_file_number); + if (it == flows_.end()) { + return Status::OK(); + } + + it->second.AddOutFlow(bytes); + + return Status::OK(); +} + +Status BlobGarbageMeter::Parse(const Slice& key, const Slice& value, + uint64_t* blob_file_number, uint64_t* bytes) { + assert(blob_file_number); + assert(*blob_file_number == kInvalidBlobFileNumber); + assert(bytes); + assert(*bytes == 0); + + ParsedInternalKey ikey; + + { + constexpr bool log_err_key = false; + const Status s = ParseInternalKey(key, &ikey, log_err_key); + if (!s.ok()) { + return s; + } + } + + if (ikey.type != kTypeBlobIndex) { + return Status::OK(); + } + + BlobIndex blob_index; + + { + const Status s = blob_index.DecodeFrom(value); + if (!s.ok()) { + return s; + } + } + + if (blob_index.IsInlined() || blob_index.HasTTL()) { + return Status::Corruption("Unexpected TTL/inlined blob index"); + } + + *blob_file_number = blob_index.file_number(); + *bytes = + blob_index.size() + + BlobLogRecord::CalculateAdjustmentForRecordHeader(ikey.user_key.size()); + + return Status::OK(); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,102 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include + +#include "db/blob/blob_constants.h" +#include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/status.h" + +namespace ROCKSDB_NAMESPACE { + +class Slice; + +// A class that can be used to compute the amount of additional garbage +// generated by a compaction. It parses the keys and blob references in the +// input and output of a compaction, and aggregates the "inflow" and "outflow" +// on a per-blob file basis. The amount of additional garbage for any given blob +// file can then be computed by subtracting the outflow from the inflow. +class BlobGarbageMeter { + public: + // A class to store the number and total size of blobs on a per-blob file + // basis. + class BlobStats { + public: + void Add(uint64_t bytes) { + ++count_; + bytes_ += bytes; + } + void Add(uint64_t count, uint64_t bytes) { + count_ += count; + bytes_ += bytes; + } + + uint64_t GetCount() const { return count_; } + uint64_t GetBytes() const { return bytes_; } + + private: + uint64_t count_ = 0; + uint64_t bytes_ = 0; + }; + + // A class to keep track of the "inflow" and the "outflow" and to compute the + // amount of additional garbage for a given blob file. + class BlobInOutFlow { + public: + void AddInFlow(uint64_t bytes) { + in_flow_.Add(bytes); + assert(IsValid()); + } + void AddOutFlow(uint64_t bytes) { + out_flow_.Add(bytes); + assert(IsValid()); + } + + const BlobStats& GetInFlow() const { return in_flow_; } + const BlobStats& GetOutFlow() const { return out_flow_; } + + bool IsValid() const { + return in_flow_.GetCount() >= out_flow_.GetCount() && + in_flow_.GetBytes() >= out_flow_.GetBytes(); + } + bool HasGarbage() const { + assert(IsValid()); + return in_flow_.GetCount() > out_flow_.GetCount(); + } + uint64_t GetGarbageCount() const { + assert(IsValid()); + assert(HasGarbage()); + return in_flow_.GetCount() - out_flow_.GetCount(); + } + uint64_t GetGarbageBytes() const { + assert(IsValid()); + assert(HasGarbage()); + return in_flow_.GetBytes() - out_flow_.GetBytes(); + } + + private: + BlobStats in_flow_; + BlobStats out_flow_; + }; + + Status ProcessInFlow(const Slice& key, const Slice& value); + Status ProcessOutFlow(const Slice& key, const Slice& value); + + const std::unordered_map& flows() const { + return flows_; + } + + private: + static Status Parse(const Slice& key, const Slice& value, + uint64_t* blob_file_number, uint64_t* bytes); + + std::unordered_map flows_; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,196 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_garbage_meter.h" + +#include +#include + +#include "db/blob/blob_index.h" +#include "db/blob/blob_log_format.h" +#include "db/dbformat.h" +#include "test_util/testharness.h" + +namespace ROCKSDB_NAMESPACE { + +TEST(BlobGarbageMeterTest, MeasureGarbage) { + BlobGarbageMeter blob_garbage_meter; + + struct BlobDescriptor { + std::string user_key; + uint64_t blob_file_number; + uint64_t offset; + uint64_t size; + CompressionType compression_type; + bool has_in_flow; + bool has_out_flow; + + uint64_t GetExpectedBytes() const { + return size + + BlobLogRecord::CalculateAdjustmentForRecordHeader(user_key.size()); + } + }; + + // Note: blob file 4 has the same inflow and outflow and hence no additional + // garbage. Blob file 5 has less outflow than inflow and thus it does have + // additional garbage. Blob file 6 is a newly written file (i.e. no inflow, + // only outflow) and is thus not tracked by the meter. + std::vector blobs{ + {"key", 4, 1234, 555, kLZ4Compression, true, true}, + {"other_key", 4, 6789, 101010, kLZ4Compression, true, true}, + {"yet_another_key", 5, 22222, 3456, kLZ4Compression, true, true}, + {"foo_key", 5, 77777, 8888, kLZ4Compression, true, true}, + {"bar_key", 5, 999999, 1212, kLZ4Compression, true, false}, + {"baz_key", 5, 1234567, 890, kLZ4Compression, true, false}, + {"new_key", 6, 7777, 9999, kNoCompression, false, true}}; + + for (const auto& blob : blobs) { + constexpr SequenceNumber seq = 123; + const InternalKey key(blob.user_key, seq, kTypeBlobIndex); + const Slice key_slice = key.Encode(); + + std::string value; + BlobIndex::EncodeBlob(&value, blob.blob_file_number, blob.offset, blob.size, + blob.compression_type); + const Slice value_slice(value); + + if (blob.has_in_flow) { + ASSERT_OK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice)); + } + if (blob.has_out_flow) { + ASSERT_OK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice)); + } + } + + const auto& flows = blob_garbage_meter.flows(); + ASSERT_EQ(flows.size(), 2); + + { + const auto it = flows.find(4); + ASSERT_NE(it, flows.end()); + + const auto& flow = it->second; + + constexpr uint64_t expected_count = 2; + const uint64_t expected_bytes = + blobs[0].GetExpectedBytes() + blobs[1].GetExpectedBytes(); + + const auto& in = flow.GetInFlow(); + ASSERT_EQ(in.GetCount(), expected_count); + ASSERT_EQ(in.GetBytes(), expected_bytes); + + const auto& out = flow.GetOutFlow(); + ASSERT_EQ(out.GetCount(), expected_count); + ASSERT_EQ(out.GetBytes(), expected_bytes); + + ASSERT_TRUE(flow.IsValid()); + ASSERT_FALSE(flow.HasGarbage()); + } + + { + const auto it = flows.find(5); + ASSERT_NE(it, flows.end()); + + const auto& flow = it->second; + + const auto& in = flow.GetInFlow(); + + constexpr uint64_t expected_in_count = 4; + const uint64_t expected_in_bytes = + blobs[2].GetExpectedBytes() + blobs[3].GetExpectedBytes() + + blobs[4].GetExpectedBytes() + blobs[5].GetExpectedBytes(); + + ASSERT_EQ(in.GetCount(), expected_in_count); + ASSERT_EQ(in.GetBytes(), expected_in_bytes); + + const auto& out = flow.GetOutFlow(); + + constexpr uint64_t expected_out_count = 2; + const uint64_t expected_out_bytes = + blobs[2].GetExpectedBytes() + blobs[3].GetExpectedBytes(); + + ASSERT_EQ(out.GetCount(), expected_out_count); + ASSERT_EQ(out.GetBytes(), expected_out_bytes); + + ASSERT_TRUE(flow.IsValid()); + ASSERT_TRUE(flow.HasGarbage()); + ASSERT_EQ(flow.GetGarbageCount(), expected_in_count - expected_out_count); + ASSERT_EQ(flow.GetGarbageBytes(), expected_in_bytes - expected_out_bytes); + } +} + +TEST(BlobGarbageMeterTest, PlainValue) { + constexpr char user_key[] = "user_key"; + constexpr SequenceNumber seq = 123; + + const InternalKey key(user_key, seq, kTypeValue); + const Slice key_slice = key.Encode(); + + constexpr char value[] = "value"; + const Slice value_slice(value); + + BlobGarbageMeter blob_garbage_meter; + + ASSERT_OK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice)); + ASSERT_OK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice)); + ASSERT_TRUE(blob_garbage_meter.flows().empty()); +} + +TEST(BlobGarbageMeterTest, CorruptInternalKey) { + constexpr char corrupt_key[] = "i_am_corrupt"; + const Slice key_slice(corrupt_key); + + constexpr char value[] = "value"; + const Slice value_slice(value); + + BlobGarbageMeter blob_garbage_meter; + + ASSERT_NOK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice)); + ASSERT_NOK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice)); +} + +TEST(BlobGarbageMeterTest, CorruptBlobIndex) { + constexpr char user_key[] = "user_key"; + constexpr SequenceNumber seq = 123; + + const InternalKey key(user_key, seq, kTypeBlobIndex); + const Slice key_slice = key.Encode(); + + constexpr char value[] = "i_am_not_a_blob_index"; + const Slice value_slice(value); + + BlobGarbageMeter blob_garbage_meter; + + ASSERT_NOK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice)); + ASSERT_NOK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice)); +} + +TEST(BlobGarbageMeterTest, InlinedTTLBlobIndex) { + constexpr char user_key[] = "user_key"; + constexpr SequenceNumber seq = 123; + + const InternalKey key(user_key, seq, kTypeBlobIndex); + const Slice key_slice = key.Encode(); + + constexpr uint64_t expiration = 1234567890; + constexpr char inlined_value[] = "inlined"; + + std::string value; + BlobIndex::EncodeInlinedTTL(&value, expiration, inlined_value); + + const Slice value_slice(value); + + BlobGarbageMeter blob_garbage_meter; + + ASSERT_NOK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice)); + ASSERT_NOK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice)); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_index.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_index.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_index.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_index.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,187 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +#pragma once + +#include +#include + +#include "rocksdb/compression_type.h" +#include "util/coding.h" +#include "util/compression.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { + +// BlobIndex is a pointer to the blob and metadata of the blob. The index is +// stored in base DB as ValueType::kTypeBlobIndex. +// There are three types of blob index: +// +// kInlinedTTL: +// +------+------------+---------------+ +// | type | expiration | value | +// +------+------------+---------------+ +// | char | varint64 | variable size | +// +------+------------+---------------+ +// +// kBlob: +// +------+-------------+----------+----------+-------------+ +// | type | file number | offset | size | compression | +// +------+-------------+----------+----------+-------------+ +// | char | varint64 | varint64 | varint64 | char | +// +------+-------------+----------+----------+-------------+ +// +// kBlobTTL: +// +------+------------+-------------+----------+----------+-------------+ +// | type | expiration | file number | offset | size | compression | +// +------+------------+-------------+----------+----------+-------------+ +// | char | varint64 | varint64 | varint64 | varint64 | char | +// +------+------------+-------------+----------+----------+-------------+ +// +// There isn't a kInlined (without TTL) type since we can store it as a plain +// value (i.e. ValueType::kTypeValue). +class BlobIndex { + public: + enum class Type : unsigned char { + kInlinedTTL = 0, + kBlob = 1, + kBlobTTL = 2, + kUnknown = 3, + }; + + BlobIndex() : type_(Type::kUnknown) {} + + BlobIndex(const BlobIndex&) = default; + BlobIndex& operator=(const BlobIndex&) = default; + + bool IsInlined() const { return type_ == Type::kInlinedTTL; } + + bool HasTTL() const { + return type_ == Type::kInlinedTTL || type_ == Type::kBlobTTL; + } + + uint64_t expiration() const { + assert(HasTTL()); + return expiration_; + } + + const Slice& value() const { + assert(IsInlined()); + return value_; + } + + uint64_t file_number() const { + assert(!IsInlined()); + return file_number_; + } + + uint64_t offset() const { + assert(!IsInlined()); + return offset_; + } + + uint64_t size() const { + assert(!IsInlined()); + return size_; + } + + CompressionType compression() const { + assert(!IsInlined()); + return compression_; + } + + Status DecodeFrom(Slice slice) { + static const std::string kErrorMessage = "Error while decoding blob index"; + assert(slice.size() > 0); + type_ = static_cast(*slice.data()); + if (type_ >= Type::kUnknown) { + return Status::Corruption( + kErrorMessage, + "Unknown blob index type: " + ToString(static_cast(type_))); + } + slice = Slice(slice.data() + 1, slice.size() - 1); + if (HasTTL()) { + if (!GetVarint64(&slice, &expiration_)) { + return Status::Corruption(kErrorMessage, "Corrupted expiration"); + } + } + if (IsInlined()) { + value_ = slice; + } else { + if (GetVarint64(&slice, &file_number_) && GetVarint64(&slice, &offset_) && + GetVarint64(&slice, &size_) && slice.size() == 1) { + compression_ = static_cast(*slice.data()); + } else { + return Status::Corruption(kErrorMessage, "Corrupted blob offset"); + } + } + return Status::OK(); + } + + std::string DebugString(bool output_hex) const { + std::ostringstream oss; + + if (IsInlined()) { + oss << "[inlined blob] value:" << value_.ToString(output_hex); + } else { + oss << "[blob ref] file:" << file_number_ << " offset:" << offset_ + << " size:" << size_ + << " compression: " << CompressionTypeToString(compression_); + } + + if (HasTTL()) { + oss << " exp:" << expiration_; + } + + return oss.str(); + } + + static void EncodeInlinedTTL(std::string* dst, uint64_t expiration, + const Slice& value) { + assert(dst != nullptr); + dst->clear(); + dst->reserve(1 + kMaxVarint64Length + value.size()); + dst->push_back(static_cast(Type::kInlinedTTL)); + PutVarint64(dst, expiration); + dst->append(value.data(), value.size()); + } + + static void EncodeBlob(std::string* dst, uint64_t file_number, + uint64_t offset, uint64_t size, + CompressionType compression) { + assert(dst != nullptr); + dst->clear(); + dst->reserve(kMaxVarint64Length * 3 + 2); + dst->push_back(static_cast(Type::kBlob)); + PutVarint64(dst, file_number); + PutVarint64(dst, offset); + PutVarint64(dst, size); + dst->push_back(static_cast(compression)); + } + + static void EncodeBlobTTL(std::string* dst, uint64_t expiration, + uint64_t file_number, uint64_t offset, + uint64_t size, CompressionType compression) { + assert(dst != nullptr); + dst->clear(); + dst->reserve(kMaxVarint64Length * 4 + 2); + dst->push_back(static_cast(Type::kBlobTTL)); + PutVarint64(dst, expiration); + PutVarint64(dst, file_number); + PutVarint64(dst, offset); + PutVarint64(dst, size); + dst->push_back(static_cast(compression)); + } + + private: + Type type_ = Type::kUnknown; + uint64_t expiration_ = 0; + Slice value_; + uint64_t file_number_ = 0; + uint64_t offset_ = 0; + uint64_t size_ = 0; + CompressionType compression_ = kNoCompression; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_format.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_format.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_format.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_format.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,145 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// + +#include "db/blob/blob_log_format.h" + +#include "util/coding.h" +#include "util/crc32c.h" + +namespace ROCKSDB_NAMESPACE { + +void BlobLogHeader::EncodeTo(std::string* dst) { + assert(dst != nullptr); + dst->clear(); + dst->reserve(BlobLogHeader::kSize); + PutFixed32(dst, kMagicNumber); + PutFixed32(dst, version); + PutFixed32(dst, column_family_id); + unsigned char flags = (has_ttl ? 1 : 0); + dst->push_back(flags); + dst->push_back(compression); + PutFixed64(dst, expiration_range.first); + PutFixed64(dst, expiration_range.second); +} + +Status BlobLogHeader::DecodeFrom(Slice src) { + static const std::string kErrorMessage = + "Error while decoding blob log header"; + if (src.size() != BlobLogHeader::kSize) { + return Status::Corruption(kErrorMessage, + "Unexpected blob file header size"); + } + uint32_t magic_number; + unsigned char flags; + if (!GetFixed32(&src, &magic_number) || !GetFixed32(&src, &version) || + !GetFixed32(&src, &column_family_id)) { + return Status::Corruption( + kErrorMessage, + "Error decoding magic number, version and column family id"); + } + if (magic_number != kMagicNumber) { + return Status::Corruption(kErrorMessage, "Magic number mismatch"); + } + if (version != kVersion1) { + return Status::Corruption(kErrorMessage, "Unknown header version"); + } + flags = src.data()[0]; + compression = static_cast(src.data()[1]); + has_ttl = (flags & 1) == 1; + src.remove_prefix(2); + if (!GetFixed64(&src, &expiration_range.first) || + !GetFixed64(&src, &expiration_range.second)) { + return Status::Corruption(kErrorMessage, "Error decoding expiration range"); + } + return Status::OK(); +} + +void BlobLogFooter::EncodeTo(std::string* dst) { + assert(dst != nullptr); + dst->clear(); + dst->reserve(BlobLogFooter::kSize); + PutFixed32(dst, kMagicNumber); + PutFixed64(dst, blob_count); + PutFixed64(dst, expiration_range.first); + PutFixed64(dst, expiration_range.second); + crc = crc32c::Value(dst->c_str(), dst->size()); + crc = crc32c::Mask(crc); + PutFixed32(dst, crc); +} + +Status BlobLogFooter::DecodeFrom(Slice src) { + static const std::string kErrorMessage = + "Error while decoding blob log footer"; + if (src.size() != BlobLogFooter::kSize) { + return Status::Corruption(kErrorMessage, + "Unexpected blob file footer size"); + } + uint32_t src_crc = 0; + src_crc = crc32c::Value(src.data(), BlobLogFooter::kSize - sizeof(uint32_t)); + src_crc = crc32c::Mask(src_crc); + uint32_t magic_number = 0; + if (!GetFixed32(&src, &magic_number) || !GetFixed64(&src, &blob_count) || + !GetFixed64(&src, &expiration_range.first) || + !GetFixed64(&src, &expiration_range.second) || !GetFixed32(&src, &crc)) { + return Status::Corruption(kErrorMessage, "Error decoding content"); + } + if (magic_number != kMagicNumber) { + return Status::Corruption(kErrorMessage, "Magic number mismatch"); + } + if (src_crc != crc) { + return Status::Corruption(kErrorMessage, "CRC mismatch"); + } + return Status::OK(); +} + +void BlobLogRecord::EncodeHeaderTo(std::string* dst) { + assert(dst != nullptr); + dst->clear(); + dst->reserve(BlobLogRecord::kHeaderSize + key.size() + value.size()); + PutFixed64(dst, key.size()); + PutFixed64(dst, value.size()); + PutFixed64(dst, expiration); + header_crc = crc32c::Value(dst->c_str(), dst->size()); + header_crc = crc32c::Mask(header_crc); + PutFixed32(dst, header_crc); + blob_crc = crc32c::Value(key.data(), key.size()); + blob_crc = crc32c::Extend(blob_crc, value.data(), value.size()); + blob_crc = crc32c::Mask(blob_crc); + PutFixed32(dst, blob_crc); +} + +Status BlobLogRecord::DecodeHeaderFrom(Slice src) { + static const std::string kErrorMessage = "Error while decoding blob record"; + if (src.size() != BlobLogRecord::kHeaderSize) { + return Status::Corruption(kErrorMessage, + "Unexpected blob record header size"); + } + uint32_t src_crc = 0; + src_crc = crc32c::Value(src.data(), BlobLogRecord::kHeaderSize - 8); + src_crc = crc32c::Mask(src_crc); + if (!GetFixed64(&src, &key_size) || !GetFixed64(&src, &value_size) || + !GetFixed64(&src, &expiration) || !GetFixed32(&src, &header_crc) || + !GetFixed32(&src, &blob_crc)) { + return Status::Corruption(kErrorMessage, "Error decoding content"); + } + if (src_crc != header_crc) { + return Status::Corruption(kErrorMessage, "Header CRC mismatch"); + } + return Status::OK(); +} + +Status BlobLogRecord::CheckBlobCRC() const { + uint32_t expected_crc = 0; + expected_crc = crc32c::Value(key.data(), key.size()); + expected_crc = crc32c::Extend(expected_crc, value.data(), value.size()); + expected_crc = crc32c::Mask(expected_crc); + if (expected_crc != blob_crc) { + return Status::Corruption("Blob CRC mismatch"); + } + return Status::OK(); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_format.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_format.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_format.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_format.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,149 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Log format information shared by reader and writer. + +#pragma once + +#include +#include + +#include "rocksdb/options.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "rocksdb/types.h" + +namespace ROCKSDB_NAMESPACE { + +constexpr uint32_t kMagicNumber = 2395959; // 0x00248f37 +constexpr uint32_t kVersion1 = 1; + +using ExpirationRange = std::pair; + +// Format of blob log file header (30 bytes): +// +// +--------------+---------+---------+-------+-------------+-------------------+ +// | magic number | version | cf id | flags | compression | expiration range | +// +--------------+---------+---------+-------+-------------+-------------------+ +// | Fixed32 | Fixed32 | Fixed32 | char | char | Fixed64 Fixed64 | +// +--------------+---------+---------+-------+-------------+-------------------+ +// +// List of flags: +// has_ttl: Whether the file contain TTL data. +// +// Expiration range in the header is a rough range based on +// blob_db_options.ttl_range_secs. +struct BlobLogHeader { + static constexpr size_t kSize = 30; + + BlobLogHeader() = default; + BlobLogHeader(uint32_t _column_family_id, CompressionType _compression, + bool _has_ttl, const ExpirationRange& _expiration_range) + : column_family_id(_column_family_id), + compression(_compression), + has_ttl(_has_ttl), + expiration_range(_expiration_range) {} + + uint32_t version = kVersion1; + uint32_t column_family_id = 0; + CompressionType compression = kNoCompression; + bool has_ttl = false; + ExpirationRange expiration_range; + + void EncodeTo(std::string* dst); + + Status DecodeFrom(Slice slice); +}; + +// Format of blob log file footer (32 bytes): +// +// +--------------+------------+-------------------+------------+ +// | magic number | blob count | expiration range | footer CRC | +// +--------------+------------+-------------------+------------+ +// | Fixed32 | Fixed64 | Fixed64 + Fixed64 | Fixed32 | +// +--------------+------------+-------------------+------------+ +// +// The footer will be presented only when the blob file is properly closed. +// +// Unlike the same field in file header, expiration range in the footer is the +// range of smallest and largest expiration of the data in this file. +struct BlobLogFooter { + static constexpr size_t kSize = 32; + + uint64_t blob_count = 0; + ExpirationRange expiration_range = std::make_pair(0, 0); + uint32_t crc = 0; + + void EncodeTo(std::string* dst); + + Status DecodeFrom(Slice slice); +}; + +// Blob record format (32 bytes header + key + value): +// +// +------------+--------------+------------+------------+----------+---------+-----------+ +// | key length | value length | expiration | header CRC | blob CRC | key | value | +// +------------+--------------+------------+------------+----------+---------+-----------+ +// | Fixed64 | Fixed64 | Fixed64 | Fixed32 | Fixed32 | key len | value len | +// +------------+--------------+------------+------------+----------+---------+-----------+ +// +// If file has has_ttl = false, expiration field is always 0, and the blob +// doesn't has expiration. +// +// Also note that if compression is used, value is compressed value and value +// length is compressed value length. +// +// Header CRC is the checksum of (key_len + val_len + expiration), while +// blob CRC is the checksum of (key + value). +// +// We could use variable length encoding (Varint64) to save more space, but it +// make reader more complicated. +struct BlobLogRecord { + // header include fields up to blob CRC + static constexpr size_t kHeaderSize = 32; + + // Note that the offset field of BlobIndex actually points to the blob value + // as opposed to the start of the blob record. The following method can + // be used to calculate the adjustment needed to read the blob record header. + static constexpr uint64_t CalculateAdjustmentForRecordHeader( + uint64_t key_size) { + return key_size + kHeaderSize; + } + + uint64_t key_size = 0; + uint64_t value_size = 0; + uint64_t expiration = 0; + uint32_t header_crc = 0; + uint32_t blob_crc = 0; + Slice key; + Slice value; + std::unique_ptr key_buf; + std::unique_ptr value_buf; + + uint64_t record_size() const { return kHeaderSize + key_size + value_size; } + + void EncodeHeaderTo(std::string* dst); + + Status DecodeHeaderFrom(Slice src); + + Status CheckBlobCRC() const; +}; + +// Checks whether a blob offset is potentially valid or not. +inline bool IsValidBlobOffset(uint64_t value_offset, uint64_t key_size, + uint64_t value_size, uint64_t file_size) { + if (value_offset < + BlobLogHeader::kSize + BlobLogRecord::kHeaderSize + key_size) { + return false; + } + + if (value_offset + value_size + BlobLogFooter::kSize > file_size) { + return false; + } + + return true; +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,132 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// + +#include "db/blob/blob_log_sequential_reader.h" + +#include "file/random_access_file_reader.h" +#include "monitoring/statistics.h" +#include "util/stop_watch.h" + +namespace ROCKSDB_NAMESPACE { + +BlobLogSequentialReader::BlobLogSequentialReader( + std::unique_ptr&& file_reader, SystemClock* clock, + Statistics* statistics) + : file_(std::move(file_reader)), + clock_(clock), + statistics_(statistics), + next_byte_(0) {} + +BlobLogSequentialReader::~BlobLogSequentialReader() = default; + +Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice, + char* buf) { + assert(slice); + assert(file_); + + StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS); + Status s = file_->Read(IOOptions(), next_byte_, static_cast(size), + slice, buf, nullptr); + next_byte_ += size; + if (!s.ok()) { + return s; + } + RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, slice->size()); + if (slice->size() != size) { + return Status::Corruption("EOF reached while reading record"); + } + return s; +} + +Status BlobLogSequentialReader::ReadHeader(BlobLogHeader* header) { + assert(header); + assert(next_byte_ == 0); + + static_assert(BlobLogHeader::kSize <= sizeof(header_buf_), + "Buffer is smaller than BlobLogHeader::kSize"); + + Status s = ReadSlice(BlobLogHeader::kSize, &buffer_, header_buf_); + if (!s.ok()) { + return s; + } + + if (buffer_.size() != BlobLogHeader::kSize) { + return Status::Corruption("EOF reached before file header"); + } + + return header->DecodeFrom(buffer_); +} + +Status BlobLogSequentialReader::ReadRecord(BlobLogRecord* record, + ReadLevel level, + uint64_t* blob_offset) { + assert(record); + static_assert(BlobLogRecord::kHeaderSize <= sizeof(header_buf_), + "Buffer is smaller than BlobLogRecord::kHeaderSize"); + + Status s = ReadSlice(BlobLogRecord::kHeaderSize, &buffer_, header_buf_); + if (!s.ok()) { + return s; + } + if (buffer_.size() != BlobLogRecord::kHeaderSize) { + return Status::Corruption("EOF reached before record header"); + } + + s = record->DecodeHeaderFrom(buffer_); + if (!s.ok()) { + return s; + } + + uint64_t kb_size = record->key_size + record->value_size; + if (blob_offset != nullptr) { + *blob_offset = next_byte_ + record->key_size; + } + + switch (level) { + case kReadHeader: + next_byte_ += kb_size; + break; + + case kReadHeaderKey: + record->key_buf.reset(new char[record->key_size]); + s = ReadSlice(record->key_size, &record->key, record->key_buf.get()); + next_byte_ += record->value_size; + break; + + case kReadHeaderKeyBlob: + record->key_buf.reset(new char[record->key_size]); + s = ReadSlice(record->key_size, &record->key, record->key_buf.get()); + if (s.ok()) { + record->value_buf.reset(new char[record->value_size]); + s = ReadSlice(record->value_size, &record->value, + record->value_buf.get()); + } + if (s.ok()) { + s = record->CheckBlobCRC(); + } + break; + } + return s; +} + +Status BlobLogSequentialReader::ReadFooter(BlobLogFooter* footer) { + assert(footer); + static_assert(BlobLogFooter::kSize <= sizeof(header_buf_), + "Buffer is smaller than BlobLogFooter::kSize"); + + Status s = ReadSlice(BlobLogFooter::kSize, &buffer_, header_buf_); + if (!s.ok()) { + return s; + } + + if (buffer_.size() != BlobLogFooter::kSize) { + return Status::Corruption("EOF reached before file footer"); + } + + return footer->DecodeFrom(buffer_); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,83 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +#pragma once + +#include + +#include "db/blob/blob_log_format.h" +#include "rocksdb/slice.h" + +#define MAX_HEADER_SIZE(a, b, c) (a > b ? (a > c ? a : c) : (b > c ? b : c)) + +namespace ROCKSDB_NAMESPACE { + +class RandomAccessFileReader; +class Env; +class Statistics; +class Status; +class SystemClock; + +/** + * BlobLogSequentialReader is a general purpose log stream reader + * implementation. The actual job of reading from the device is implemented by + * the RandomAccessFileReader interface. + * + * Please see BlobLogWriter for details on the file and record layout. + */ + +class BlobLogSequentialReader { + public: + enum ReadLevel { + kReadHeader, + kReadHeaderKey, + kReadHeaderKeyBlob, + }; + + // Create a reader that will return log records from "*file_reader". + BlobLogSequentialReader(std::unique_ptr&& file_reader, + SystemClock* clock, Statistics* statistics); + + // No copying allowed + BlobLogSequentialReader(const BlobLogSequentialReader&) = delete; + BlobLogSequentialReader& operator=(const BlobLogSequentialReader&) = delete; + + ~BlobLogSequentialReader(); + + Status ReadHeader(BlobLogHeader* header); + + // Read the next record into *record. Returns true if read + // successfully, false if we hit end of the input. The contents filled in + // *record will only be valid until the next mutating operation on this + // reader. + // If blob_offset is non-null, return offset of the blob through it. + Status ReadRecord(BlobLogRecord* record, ReadLevel level = kReadHeader, + uint64_t* blob_offset = nullptr); + + Status ReadFooter(BlobLogFooter* footer); + + void ResetNextByte() { next_byte_ = 0; } + + uint64_t GetNextByte() const { return next_byte_; } + + private: + Status ReadSlice(uint64_t size, Slice* slice, char* buf); + + const std::unique_ptr file_; + SystemClock* clock_; + + Statistics* statistics_; + + Slice buffer_; + char header_buf_[MAX_HEADER_SIZE(BlobLogHeader::kSize, BlobLogFooter::kSize, + BlobLogRecord::kHeaderSize)]; + + // which byte to read next + uint64_t next_byte_; +}; + +} // namespace ROCKSDB_NAMESPACE + +#undef MAX_HEADER_SIZE \ No newline at end of file diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_writer.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_writer.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_writer.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_writer.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,172 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_log_writer.h" + +#include +#include + +#include "db/blob/blob_log_format.h" +#include "file/writable_file_writer.h" +#include "monitoring/statistics.h" +#include "rocksdb/system_clock.h" +#include "test_util/sync_point.h" +#include "util/coding.h" +#include "util/stop_watch.h" + +namespace ROCKSDB_NAMESPACE { + +BlobLogWriter::BlobLogWriter(std::unique_ptr&& dest, + SystemClock* clock, Statistics* statistics, + uint64_t log_number, bool use_fs, bool do_flush, + uint64_t boffset) + : dest_(std::move(dest)), + clock_(clock), + statistics_(statistics), + log_number_(log_number), + block_offset_(boffset), + use_fsync_(use_fs), + do_flush_(do_flush), + last_elem_type_(kEtNone) {} + +BlobLogWriter::~BlobLogWriter() = default; + +Status BlobLogWriter::Sync() { + TEST_SYNC_POINT("BlobLogWriter::Sync"); + + StopWatch sync_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS); + Status s = dest_->Sync(use_fsync_); + RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED); + return s; +} + +Status BlobLogWriter::WriteHeader(BlobLogHeader& header) { + assert(block_offset_ == 0); + assert(last_elem_type_ == kEtNone); + std::string str; + header.EncodeTo(&str); + + Status s = dest_->Append(Slice(str)); + if (s.ok()) { + block_offset_ += str.size(); + if (do_flush_) { + s = dest_->Flush(); + } + } + last_elem_type_ = kEtFileHdr; + RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN, + BlobLogHeader::kSize); + return s; +} + +Status BlobLogWriter::AppendFooter(BlobLogFooter& footer, + std::string* checksum_method, + std::string* checksum_value) { + assert(block_offset_ != 0); + assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord); + + std::string str; + footer.EncodeTo(&str); + + Status s = dest_->Append(Slice(str)); + if (s.ok()) { + block_offset_ += str.size(); + + s = Sync(); + + if (s.ok()) { + s = dest_->Close(); + + if (s.ok()) { + assert(!!checksum_method == !!checksum_value); + + if (checksum_method) { + assert(checksum_method->empty()); + + std::string method = dest_->GetFileChecksumFuncName(); + if (method != kUnknownFileChecksumFuncName) { + *checksum_method = std::move(method); + } + } + if (checksum_value) { + assert(checksum_value->empty()); + + std::string value = dest_->GetFileChecksum(); + if (value != kUnknownFileChecksum) { + *checksum_value = std::move(value); + } + } + } + } + + dest_.reset(); + } + + last_elem_type_ = kEtFileFooter; + RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN, + BlobLogFooter::kSize); + return s; +} + +Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val, + uint64_t expiration, uint64_t* key_offset, + uint64_t* blob_offset) { + assert(block_offset_ != 0); + assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord); + + std::string buf; + ConstructBlobHeader(&buf, key, val, expiration); + + Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset); + return s; +} + +Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val, + uint64_t* key_offset, uint64_t* blob_offset) { + assert(block_offset_ != 0); + assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord); + + std::string buf; + ConstructBlobHeader(&buf, key, val, 0); + + Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset); + return s; +} + +void BlobLogWriter::ConstructBlobHeader(std::string* buf, const Slice& key, + const Slice& val, uint64_t expiration) { + BlobLogRecord record; + record.key = key; + record.value = val; + record.expiration = expiration; + record.EncodeHeaderTo(buf); +} + +Status BlobLogWriter::EmitPhysicalRecord(const std::string& headerbuf, + const Slice& key, const Slice& val, + uint64_t* key_offset, + uint64_t* blob_offset) { + StopWatch write_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS); + Status s = dest_->Append(Slice(headerbuf)); + if (s.ok()) { + s = dest_->Append(key); + } + if (s.ok()) { + s = dest_->Append(val); + } + if (do_flush_ && s.ok()) { + s = dest_->Flush(); + } + + *key_offset = block_offset_ + BlobLogRecord::kHeaderSize; + *blob_offset = *key_offset + key.size(); + block_offset_ = *blob_offset + val.size(); + last_elem_type_ = kEtRecord; + RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN, + BlobLogRecord::kHeaderSize + key.size() + val.size()); + return s; +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_writer.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_writer.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/blob_log_writer.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/blob_log_writer.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,83 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +#pragma once + +#include +#include +#include + +#include "db/blob/blob_log_format.h" +#include "rocksdb/slice.h" +#include "rocksdb/statistics.h" +#include "rocksdb/status.h" +#include "rocksdb/types.h" + +namespace ROCKSDB_NAMESPACE { + +class WritableFileWriter; +class SystemClock; +/** + * BlobLogWriter is the blob log stream writer. It provides an append-only + * abstraction for writing blob data. + * + * + * Look at blob_db_format.h to see the details of the record formats. + */ + +class BlobLogWriter { + public: + // Create a writer that will append data to "*dest". + // "*dest" must be initially empty. + // "*dest" must remain live while this BlobLogWriter is in use. + BlobLogWriter(std::unique_ptr&& dest, SystemClock* clock, + Statistics* statistics, uint64_t log_number, bool use_fsync, + bool do_flush, uint64_t boffset = 0); + // No copying allowed + BlobLogWriter(const BlobLogWriter&) = delete; + BlobLogWriter& operator=(const BlobLogWriter&) = delete; + + ~BlobLogWriter(); + + static void ConstructBlobHeader(std::string* buf, const Slice& key, + const Slice& val, uint64_t expiration); + + Status AddRecord(const Slice& key, const Slice& val, uint64_t* key_offset, + uint64_t* blob_offset); + + Status AddRecord(const Slice& key, const Slice& val, uint64_t expiration, + uint64_t* key_offset, uint64_t* blob_offset); + + Status EmitPhysicalRecord(const std::string& headerbuf, const Slice& key, + const Slice& val, uint64_t* key_offset, + uint64_t* blob_offset); + + Status AppendFooter(BlobLogFooter& footer, std::string* checksum_method, + std::string* checksum_value); + + Status WriteHeader(BlobLogHeader& header); + + WritableFileWriter* file() { return dest_.get(); } + + const WritableFileWriter* file() const { return dest_.get(); } + + uint64_t get_log_number() const { return log_number_; } + + Status Sync(); + + private: + std::unique_ptr dest_; + SystemClock* clock_; + Statistics* statistics_; + uint64_t log_number_; + uint64_t block_offset_; // Current offset in block + bool use_fsync_; + bool do_flush_; + + public: + enum ElemType { kEtNone, kEtFileHdr, kEtRecord, kEtFileFooter }; + ElemType last_elem_type_; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,1026 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include +#include + +#include "db/blob/blob_index.h" +#include "db/blob/blob_log_format.h" +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "test_util/sync_point.h" +#include "utilities/fault_injection_env.h" + +namespace ROCKSDB_NAMESPACE { + +class DBBlobBasicTest : public DBTestBase { + protected: + DBBlobBasicTest() + : DBTestBase("db_blob_basic_test", /* env_do_fsync */ false) {} +}; + +TEST_F(DBBlobBasicTest, GetBlob) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr char key[] = "key"; + constexpr char blob_value[] = "blob_value"; + + ASSERT_OK(Put(key, blob_value)); + + ASSERT_OK(Flush()); + + ASSERT_EQ(Get(key), blob_value); + + // Try again with no I/O allowed. The table and the necessary blocks should + // already be in their respective caches; however, the blob itself can only be + // read from the blob file, so the read should return Incomplete. + ReadOptions read_options; + read_options.read_tier = kBlockCacheTier; + + PinnableSlice result; + ASSERT_TRUE(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result) + .IsIncomplete()); +} + +TEST_F(DBBlobBasicTest, MultiGetBlobs) { + constexpr size_t min_blob_size = 6; + + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = min_blob_size; + + Reopen(options); + + // Put then retrieve three key-values. The first value is below the size limit + // and is thus stored inline; the other two are stored separately as blobs. + constexpr size_t num_keys = 3; + + constexpr char first_key[] = "first_key"; + constexpr char first_value[] = "short"; + static_assert(sizeof(first_value) - 1 < min_blob_size, + "first_value too long to be inlined"); + + ASSERT_OK(Put(first_key, first_value)); + + constexpr char second_key[] = "second_key"; + constexpr char second_value[] = "long_value"; + static_assert(sizeof(second_value) - 1 >= min_blob_size, + "second_value too short to be stored as blob"); + + ASSERT_OK(Put(second_key, second_value)); + + constexpr char third_key[] = "third_key"; + constexpr char third_value[] = "other_long_value"; + static_assert(sizeof(third_value) - 1 >= min_blob_size, + "third_value too short to be stored as blob"); + + ASSERT_OK(Put(third_key, third_value)); + + ASSERT_OK(Flush()); + + ReadOptions read_options; + + std::array keys{{first_key, second_key, third_key}}; + + { + std::array values; + std::array statuses; + + db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys, &keys[0], + &values[0], &statuses[0]); + + ASSERT_OK(statuses[0]); + ASSERT_EQ(values[0], first_value); + + ASSERT_OK(statuses[1]); + ASSERT_EQ(values[1], second_value); + + ASSERT_OK(statuses[2]); + ASSERT_EQ(values[2], third_value); + } + + // Try again with no I/O allowed. The table and the necessary blocks should + // already be in their respective caches. The first (inlined) value should be + // successfully read; however, the two blob values could only be read from the + // blob file, so for those the read should return Incomplete. + read_options.read_tier = kBlockCacheTier; + + { + std::array values; + std::array statuses; + + db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys, &keys[0], + &values[0], &statuses[0]); + + ASSERT_OK(statuses[0]); + ASSERT_EQ(values[0], first_value); + + ASSERT_TRUE(statuses[1].IsIncomplete()); + + ASSERT_TRUE(statuses[2].IsIncomplete()); + } +} + +#ifndef ROCKSDB_LITE +TEST_F(DBBlobBasicTest, MultiGetWithDirectIO) { + Options options = GetDefaultOptions(); + + // First, create an external SST file ["b"]. + const std::string file_path = dbname_ + "/test.sst"; + { + SstFileWriter sst_file_writer(EnvOptions(), GetDefaultOptions()); + Status s = sst_file_writer.Open(file_path); + ASSERT_OK(s); + ASSERT_OK(sst_file_writer.Put("b", "b_value")); + ASSERT_OK(sst_file_writer.Finish()); + } + + options.enable_blob_files = true; + options.min_blob_size = 1000; + options.use_direct_reads = true; + options.allow_ingest_behind = true; + + // Open DB with fixed-prefix sst-partitioner so that compaction will cut + // new table file when encountering a new key whose 1-byte prefix changes. + constexpr size_t key_len = 1; + options.sst_partitioner_factory = + NewSstPartitionerFixedPrefixFactory(key_len); + + Status s = TryReopen(options); + if (s.IsInvalidArgument()) { + ROCKSDB_GTEST_SKIP("This test requires direct IO support"); + return; + } + ASSERT_OK(s); + + constexpr size_t num_keys = 3; + constexpr size_t blob_size = 3000; + + constexpr char first_key[] = "a"; + const std::string first_blob(blob_size, 'a'); + ASSERT_OK(Put(first_key, first_blob)); + + constexpr char second_key[] = "b"; + const std::string second_blob(2 * blob_size, 'b'); + ASSERT_OK(Put(second_key, second_blob)); + + constexpr char third_key[] = "d"; + const std::string third_blob(blob_size, 'd'); + ASSERT_OK(Put(third_key, third_blob)); + + // first_blob, second_blob and third_blob in the same blob file. + // SST Blob file + // L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'| + // | | | ^ ^ ^ + // | | | | | | + // | | +---------|-------|--------+ + // | +-----------------|-------+ + // +-------------------------+ + ASSERT_OK(Flush()); + + constexpr char fourth_key[] = "c"; + const std::string fourth_blob(blob_size, 'c'); + ASSERT_OK(Put(fourth_key, fourth_blob)); + // fourth_blob in another blob file. + // SST Blob file SST Blob file + // L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'| ["c"] |'cccc'| + // | | | ^ ^ ^ | ^ + // | | | | | | | | + // | | +---------|-------|--------+ +-------+ + // | +-----------------|-------+ + // +-------------------------+ + ASSERT_OK(Flush()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + + // Due to the above sst partitioner, we get 4 L1 files. The blob files are + // unchanged. + // |'aaaa', 'bbbb', 'dddd'| |'cccc'| + // ^ ^ ^ ^ + // | | | | + // L0 | | | | + // L1 ["a"] ["b"] ["c"] | | ["d"] | + // | | | | | | + // | | +---------|-------|---------------+ + // | +-----------------|-------+ + // +-------------------------+ + ASSERT_EQ(4, NumTableFilesAtLevel(/*level=*/1)); + + { + // Ingest the external SST file into bottommost level. + std::vector ext_files{file_path}; + IngestExternalFileOptions opts; + opts.ingest_behind = true; + ASSERT_OK( + db_->IngestExternalFile(db_->DefaultColumnFamily(), ext_files, opts)); + } + + // Now the database becomes as follows. + // |'aaaa', 'bbbb', 'dddd'| |'cccc'| + // ^ ^ ^ ^ + // | | | | + // L0 | | | | + // L1 ["a"] ["b"] ["c"] | | ["d"] | + // | | | | | | + // | | +---------|-------|---------------+ + // | +-----------------|-------+ + // +-------------------------+ + // + // L6 ["b"] + + { + // Compact ["b"] to bottommost level. + Slice begin = Slice(second_key); + Slice end = Slice(second_key); + CompactRangeOptions cro; + cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; + ASSERT_OK(db_->CompactRange(cro, &begin, &end)); + } + + // |'aaaa', 'bbbb', 'dddd'| |'cccc'| + // ^ ^ ^ ^ + // | | | | + // L0 | | | | + // L1 ["a"] ["c"] | | ["d"] | + // | | | | | + // | +---------|-------|---------------+ + // | +-----------------|-------+ + // +-------|-----------------+ + // | + // L6 ["b"] + ASSERT_EQ(3, NumTableFilesAtLevel(/*level=*/1)); + ASSERT_EQ(1, NumTableFilesAtLevel(/*level=*/6)); + + bool called = false; + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->SetCallBack( + "RandomAccessFileReader::MultiRead:AlignedReqs", [&](void* arg) { + auto* aligned_reqs = static_cast*>(arg); + assert(aligned_reqs); + ASSERT_EQ(1, aligned_reqs->size()); + called = true; + }); + SyncPoint::GetInstance()->EnableProcessing(); + + std::array keys{{first_key, third_key, second_key}}; + + { + std::array values; + std::array statuses; + + // The MultiGet(), when constructing the KeyContexts, will process the keys + // in such order: a, d, b. The reason is that ["a"] and ["d"] are in L1, + // while ["b"] resides in L6. + // Consequently, the original FSReadRequest list prepared by + // Version::MultiGetblob() will be for "a", "d" and "b". It is unsorted as + // follows: + // + // ["a", offset=30, len=3033], + // ["d", offset=9096, len=3033], + // ["b", offset=3063, len=6033] + // + // If we do not sort them before calling MultiRead() in DirectIO, then the + // underlying IO merging logic will yield two requests. + // + // [offset=0, len=4096] (for "a") + // [offset=0, len=12288] (result of merging the request for "d" and "b") + // + // We need to sort them in Version::MultiGetBlob() so that the underlying + // IO merging logic in DirectIO mode works as expected. The correct + // behavior will be one aligned request: + // + // [offset=0, len=12288] + + db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys, &keys[0], + &values[0], &statuses[0]); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + ASSERT_TRUE(called); + + ASSERT_OK(statuses[0]); + ASSERT_EQ(values[0], first_blob); + + ASSERT_OK(statuses[1]); + ASSERT_EQ(values[1], third_blob); + + ASSERT_OK(statuses[2]); + ASSERT_EQ(values[2], second_blob); + } +} +#endif // !ROCKSDB_LITE + +TEST_F(DBBlobBasicTest, MultiGetBlobsFromMultipleFiles) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr size_t kNumBlobFiles = 3; + constexpr size_t kNumBlobsPerFile = 3; + constexpr size_t kNumKeys = kNumBlobsPerFile * kNumBlobFiles; + + std::vector key_strs; + std::vector value_strs; + for (size_t i = 0; i < kNumBlobFiles; ++i) { + for (size_t j = 0; j < kNumBlobsPerFile; ++j) { + std::string key = "key" + std::to_string(i) + "_" + std::to_string(j); + std::string value = + "value_as_blob" + std::to_string(i) + "_" + std::to_string(j); + ASSERT_OK(Put(key, value)); + key_strs.push_back(key); + value_strs.push_back(value); + } + ASSERT_OK(Flush()); + } + assert(key_strs.size() == kNumKeys); + std::array keys; + for (size_t i = 0; i < keys.size(); ++i) { + keys[i] = key_strs[i]; + } + std::array values; + std::array statuses; + db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), kNumKeys, &keys[0], + &values[0], &statuses[0]); + + for (size_t i = 0; i < kNumKeys; ++i) { + ASSERT_OK(statuses[i]); + ASSERT_EQ(value_strs[i], values[i]); + } +} + +TEST_F(DBBlobBasicTest, GetBlob_CorruptIndex) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr char key[] = "key"; + + // Fake a corrupt blob index. + const std::string blob_index("foobar"); + + WriteBatch batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index)); + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + ASSERT_OK(Flush()); + + PinnableSlice result; + ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result) + .IsCorruption()); +} + +TEST_F(DBBlobBasicTest, MultiGetBlob_CorruptIndex) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.create_if_missing = true; + + DestroyAndReopen(options); + + constexpr size_t kNumOfKeys = 3; + std::array key_strs; + std::array value_strs; + std::array keys; + for (size_t i = 0; i < kNumOfKeys; ++i) { + key_strs[i] = "foo" + std::to_string(i); + value_strs[i] = "blob_value" + std::to_string(i); + ASSERT_OK(Put(key_strs[i], value_strs[i])); + keys[i] = key_strs[i]; + } + + constexpr char key[] = "key"; + { + // Fake a corrupt blob index. + const std::string blob_index("foobar"); + WriteBatch batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index)); + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + keys[kNumOfKeys] = Slice(static_cast(key), sizeof(key) - 1); + } + + ASSERT_OK(Flush()); + + std::array values; + std::array statuses; + db_->MultiGet(ReadOptions(), dbfull()->DefaultColumnFamily(), kNumOfKeys + 1, + keys.data(), values.data(), statuses.data(), + /*sorted_input=*/false); + for (size_t i = 0; i < kNumOfKeys + 1; ++i) { + if (i != kNumOfKeys) { + ASSERT_OK(statuses[i]); + ASSERT_EQ("blob_value" + std::to_string(i), values[i]); + } else { + ASSERT_TRUE(statuses[i].IsCorruption()); + } + } +} + +TEST_F(DBBlobBasicTest, MultiGetBlob_ExceedSoftLimit) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr size_t kNumOfKeys = 3; + std::array key_bufs; + std::array value_bufs; + std::array keys; + for (size_t i = 0; i < kNumOfKeys; ++i) { + key_bufs[i] = "foo" + std::to_string(i); + value_bufs[i] = "blob_value" + std::to_string(i); + ASSERT_OK(Put(key_bufs[i], value_bufs[i])); + keys[i] = key_bufs[i]; + } + ASSERT_OK(Flush()); + + std::array values; + std::array statuses; + ReadOptions read_opts; + read_opts.value_size_soft_limit = 1; + db_->MultiGet(read_opts, dbfull()->DefaultColumnFamily(), kNumOfKeys, + keys.data(), values.data(), statuses.data(), + /*sorted_input=*/true); + for (const auto& s : statuses) { + ASSERT_TRUE(s.IsAborted()); + } +} + +TEST_F(DBBlobBasicTest, GetBlob_InlinedTTLIndex) { + constexpr uint64_t min_blob_size = 10; + + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = min_blob_size; + + Reopen(options); + + constexpr char key[] = "key"; + constexpr char blob[] = "short"; + static_assert(sizeof(short) - 1 < min_blob_size, + "Blob too long to be inlined"); + + // Fake an inlined TTL blob index. + std::string blob_index; + + constexpr uint64_t expiration = 1234567890; + + BlobIndex::EncodeInlinedTTL(&blob_index, expiration, blob); + + WriteBatch batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index)); + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + ASSERT_OK(Flush()); + + PinnableSlice result; + ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result) + .IsCorruption()); +} + +TEST_F(DBBlobBasicTest, GetBlob_IndexWithInvalidFileNumber) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr char key[] = "key"; + + // Fake a blob index referencing a non-existent blob file. + std::string blob_index; + + constexpr uint64_t blob_file_number = 1000; + constexpr uint64_t offset = 1234; + constexpr uint64_t size = 5678; + + BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size, + kNoCompression); + + WriteBatch batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index)); + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + ASSERT_OK(Flush()); + + PinnableSlice result; + ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result) + .IsCorruption()); +} + +#ifndef ROCKSDB_LITE +TEST_F(DBBlobBasicTest, GenerateIOTracing) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + std::string trace_file = dbname_ + "/io_trace_file"; + + Reopen(options); + { + // Create IO trace file + std::unique_ptr trace_writer; + ASSERT_OK( + NewFileTraceWriter(env_, EnvOptions(), trace_file, &trace_writer)); + ASSERT_OK(db_->StartIOTrace(TraceOptions(), std::move(trace_writer))); + + constexpr char key[] = "key"; + constexpr char blob_value[] = "blob_value"; + + ASSERT_OK(Put(key, blob_value)); + ASSERT_OK(Flush()); + ASSERT_EQ(Get(key), blob_value); + + ASSERT_OK(db_->EndIOTrace()); + ASSERT_OK(env_->FileExists(trace_file)); + } + { + // Parse trace file to check file operations related to blob files are + // recorded. + std::unique_ptr trace_reader; + ASSERT_OK( + NewFileTraceReader(env_, EnvOptions(), trace_file, &trace_reader)); + IOTraceReader reader(std::move(trace_reader)); + + IOTraceHeader header; + ASSERT_OK(reader.ReadHeader(&header)); + ASSERT_EQ(kMajorVersion, static_cast(header.rocksdb_major_version)); + ASSERT_EQ(kMinorVersion, static_cast(header.rocksdb_minor_version)); + + // Read records. + int blob_files_op_count = 0; + Status status; + while (true) { + IOTraceRecord record; + status = reader.ReadIOOp(&record); + if (!status.ok()) { + break; + } + if (record.file_name.find("blob") != std::string::npos) { + blob_files_op_count++; + } + } + // Assuming blob files will have Append, Close and then Read operations. + ASSERT_GT(blob_files_op_count, 2); + } +} +#endif // !ROCKSDB_LITE + +TEST_F(DBBlobBasicTest, BestEffortsRecovery_MissingNewestBlobFile) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.create_if_missing = true; + Reopen(options); + + ASSERT_OK(dbfull()->DisableFileDeletions()); + constexpr int kNumTableFiles = 2; + for (int i = 0; i < kNumTableFiles; ++i) { + for (char ch = 'a'; ch != 'c'; ++ch) { + std::string key(1, ch); + ASSERT_OK(Put(key, "value" + std::to_string(i))); + } + ASSERT_OK(Flush()); + } + + Close(); + + std::vector files; + ASSERT_OK(env_->GetChildren(dbname_, &files)); + std::string blob_file_path; + uint64_t max_blob_file_num = kInvalidBlobFileNumber; + for (const auto& fname : files) { + uint64_t file_num = 0; + FileType type; + if (ParseFileName(fname, &file_num, /*info_log_name_prefix=*/"", &type) && + type == kBlobFile) { + if (file_num > max_blob_file_num) { + max_blob_file_num = file_num; + blob_file_path = dbname_ + "/" + fname; + } + } + } + ASSERT_OK(env_->DeleteFile(blob_file_path)); + + options.best_efforts_recovery = true; + Reopen(options); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "a", &value)); + ASSERT_EQ("value" + std::to_string(kNumTableFiles - 2), value); +} + +TEST_F(DBBlobBasicTest, GetMergeBlobWithPut) { + Options options = GetDefaultOptions(); + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + ASSERT_OK(Put("Key1", "v1")); + ASSERT_OK(Flush()); + ASSERT_OK(Merge("Key1", "v2")); + ASSERT_OK(Flush()); + ASSERT_OK(Merge("Key1", "v3")); + ASSERT_OK(Flush()); + + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "Key1", &value)); + ASSERT_EQ(Get("Key1"), "v1,v2,v3"); +} + +TEST_F(DBBlobBasicTest, MultiGetMergeBlobWithPut) { + constexpr size_t num_keys = 3; + + Options options = GetDefaultOptions(); + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + ASSERT_OK(Put("Key0", "v0_0")); + ASSERT_OK(Put("Key1", "v1_0")); + ASSERT_OK(Put("Key2", "v2_0")); + ASSERT_OK(Flush()); + ASSERT_OK(Merge("Key0", "v0_1")); + ASSERT_OK(Merge("Key1", "v1_1")); + ASSERT_OK(Flush()); + ASSERT_OK(Merge("Key0", "v0_2")); + ASSERT_OK(Flush()); + + std::array keys{{"Key0", "Key1", "Key2"}}; + std::array values; + std::array statuses; + + db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys, &keys[0], + &values[0], &statuses[0]); + + ASSERT_OK(statuses[0]); + ASSERT_EQ(values[0], "v0_0,v0_1,v0_2"); + + ASSERT_OK(statuses[1]); + ASSERT_EQ(values[1], "v1_0,v1_1"); + + ASSERT_OK(statuses[2]); + ASSERT_EQ(values[2], "v2_0"); +} + +#ifndef ROCKSDB_LITE +TEST_F(DBBlobBasicTest, Properties) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr char key1[] = "key1"; + constexpr size_t key1_size = sizeof(key1) - 1; + + constexpr char key2[] = "key2"; + constexpr size_t key2_size = sizeof(key2) - 1; + + constexpr char key3[] = "key3"; + constexpr size_t key3_size = sizeof(key3) - 1; + + constexpr char blob[] = "0000000000"; + constexpr size_t blob_size = sizeof(blob) - 1; + + ASSERT_OK(Put(key1, blob)); + ASSERT_OK(Put(key2, blob)); + ASSERT_OK(Flush()); + + constexpr size_t first_blob_file_expected_size = + BlobLogHeader::kSize + + BlobLogRecord::CalculateAdjustmentForRecordHeader(key1_size) + blob_size + + BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size) + blob_size + + BlobLogFooter::kSize; + + ASSERT_OK(Put(key3, blob)); + ASSERT_OK(Flush()); + + constexpr size_t second_blob_file_expected_size = + BlobLogHeader::kSize + + BlobLogRecord::CalculateAdjustmentForRecordHeader(key3_size) + blob_size + + BlobLogFooter::kSize; + + constexpr size_t total_expected_size = + first_blob_file_expected_size + second_blob_file_expected_size; + + // Number of blob files + uint64_t num_blob_files = 0; + ASSERT_TRUE( + db_->GetIntProperty(DB::Properties::kNumBlobFiles, &num_blob_files)); + ASSERT_EQ(num_blob_files, 2); + + // Total size of live blob files + uint64_t live_blob_file_size = 0; + ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileSize, + &live_blob_file_size)); + ASSERT_EQ(live_blob_file_size, total_expected_size); + + // Total size of all blob files across all versions + // Note: this should be the same as above since we only have one + // version at this point. + uint64_t total_blob_file_size = 0; + ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize, + &total_blob_file_size)); + ASSERT_EQ(total_blob_file_size, total_expected_size); + + // Delete key2 to create some garbage + ASSERT_OK(Delete(key2)); + ASSERT_OK(Flush()); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + constexpr size_t expected_garbage_size = + BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size) + blob_size; + + // Blob file stats + std::string blob_stats; + ASSERT_TRUE(db_->GetProperty(DB::Properties::kBlobStats, &blob_stats)); + + std::ostringstream oss; + oss << "Number of blob files: 2\nTotal size of blob files: " + << total_expected_size + << "\nTotal size of garbage in blob files: " << expected_garbage_size + << '\n'; + + ASSERT_EQ(blob_stats, oss.str()); +} + +TEST_F(DBBlobBasicTest, PropertiesMultiVersion) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr char key1[] = "key1"; + constexpr char key2[] = "key2"; + constexpr char key3[] = "key3"; + + constexpr size_t key_size = sizeof(key1) - 1; + static_assert(sizeof(key2) - 1 == key_size, "unexpected size: key2"); + static_assert(sizeof(key3) - 1 == key_size, "unexpected size: key3"); + + constexpr char blob[] = "0000000000"; + constexpr size_t blob_size = sizeof(blob) - 1; + + ASSERT_OK(Put(key1, blob)); + ASSERT_OK(Flush()); + + ASSERT_OK(Put(key2, blob)); + ASSERT_OK(Flush()); + + // Create an iterator to keep the current version alive + std::unique_ptr iter(db_->NewIterator(ReadOptions())); + ASSERT_OK(iter->status()); + + // Note: the Delete and subsequent compaction results in the first blob file + // not making it to the final version. (It is still part of the previous + // version kept alive by the iterator though.) On the other hand, the Put + // results in a third blob file. + ASSERT_OK(Delete(key1)); + ASSERT_OK(Put(key3, blob)); + ASSERT_OK(Flush()); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + // Total size of all blob files across all versions: between the two versions, + // we should have three blob files of the same size with one blob each. + // The version kept alive by the iterator contains the first and the second + // blob file, while the final version contains the second and the third blob + // file. (The second blob file is thus shared by the two versions but should + // be counted only once.) + uint64_t total_blob_file_size = 0; + ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize, + &total_blob_file_size)); + ASSERT_EQ(total_blob_file_size, + 3 * (BlobLogHeader::kSize + + BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) + + blob_size + BlobLogFooter::kSize)); +} +#endif // !ROCKSDB_LITE + +class DBBlobBasicIOErrorTest : public DBBlobBasicTest, + public testing::WithParamInterface { + protected: + DBBlobBasicIOErrorTest() : sync_point_(GetParam()) { + fault_injection_env_.reset(new FaultInjectionTestEnv(env_)); + } + ~DBBlobBasicIOErrorTest() { Close(); } + + std::unique_ptr fault_injection_env_; + std::string sync_point_; +}; + +class DBBlobBasicIOErrorMultiGetTest : public DBBlobBasicIOErrorTest { + public: + DBBlobBasicIOErrorMultiGetTest() : DBBlobBasicIOErrorTest() {} +}; + +INSTANTIATE_TEST_CASE_P(DBBlobBasicTest, DBBlobBasicIOErrorTest, + ::testing::ValuesIn(std::vector{ + "BlobFileReader::OpenFile:NewRandomAccessFile", + "BlobFileReader::GetBlob:ReadFromFile"})); + +INSTANTIATE_TEST_CASE_P(DBBlobBasicTest, DBBlobBasicIOErrorMultiGetTest, + ::testing::ValuesIn(std::vector{ + "BlobFileReader::OpenFile:NewRandomAccessFile", + "BlobFileReader::MultiGetBlob:ReadFromFile"})); + +TEST_P(DBBlobBasicIOErrorTest, GetBlob_IOError) { + Options options; + options.env = fault_injection_env_.get(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr char key[] = "key"; + constexpr char blob_value[] = "blob_value"; + + ASSERT_OK(Put(key, blob_value)); + + ASSERT_OK(Flush()); + + SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) { + fault_injection_env_->SetFilesystemActive(false, + Status::IOError(sync_point_)); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + PinnableSlice result; + ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result) + .IsIOError()); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_P(DBBlobBasicIOErrorMultiGetTest, MultiGetBlobs_IOError) { + Options options = GetDefaultOptions(); + options.env = fault_injection_env_.get(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr size_t num_keys = 2; + + constexpr char first_key[] = "first_key"; + constexpr char first_value[] = "first_value"; + + ASSERT_OK(Put(first_key, first_value)); + + constexpr char second_key[] = "second_key"; + constexpr char second_value[] = "second_value"; + + ASSERT_OK(Put(second_key, second_value)); + + ASSERT_OK(Flush()); + + std::array keys{{first_key, second_key}}; + std::array values; + std::array statuses; + + SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) { + fault_injection_env_->SetFilesystemActive(false, + Status::IOError(sync_point_)); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys, &keys[0], + &values[0], &statuses[0]); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + ASSERT_TRUE(statuses[0].IsIOError()); + ASSERT_TRUE(statuses[1].IsIOError()); +} + +TEST_P(DBBlobBasicIOErrorMultiGetTest, MultipleBlobFiles) { + Options options = GetDefaultOptions(); + options.env = fault_injection_env_.get(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + Reopen(options); + + constexpr size_t num_keys = 2; + + constexpr char key1[] = "key1"; + constexpr char value1[] = "blob1"; + + ASSERT_OK(Put(key1, value1)); + ASSERT_OK(Flush()); + + constexpr char key2[] = "key2"; + constexpr char value2[] = "blob2"; + + ASSERT_OK(Put(key2, value2)); + ASSERT_OK(Flush()); + + std::array keys{{key1, key2}}; + std::array values; + std::array statuses; + + bool first_blob_file = true; + SyncPoint::GetInstance()->SetCallBack( + sync_point_, [&first_blob_file, this](void* /* arg */) { + if (first_blob_file) { + first_blob_file = false; + return; + } + fault_injection_env_->SetFilesystemActive(false, + Status::IOError(sync_point_)); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys, + keys.data(), values.data(), statuses.data()); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + ASSERT_OK(statuses[0]); + ASSERT_EQ(value1, values[0]); + ASSERT_TRUE(statuses[1].IsIOError()); +} + +namespace { + +class ReadBlobCompactionFilter : public CompactionFilter { + public: + ReadBlobCompactionFilter() = default; + const char* Name() const override { + return "rocksdb.compaction.filter.read.blob"; + } + CompactionFilter::Decision FilterV2( + int /*level*/, const Slice& /*key*/, ValueType value_type, + const Slice& existing_value, std::string* new_value, + std::string* /*skip_until*/) const override { + if (value_type != CompactionFilter::ValueType::kValue) { + return CompactionFilter::Decision::kKeep; + } + assert(new_value); + new_value->assign(existing_value.data(), existing_value.size()); + return CompactionFilter::Decision::kChangeValue; + } +}; + +} // anonymous namespace + +TEST_P(DBBlobBasicIOErrorTest, CompactionFilterReadBlob_IOError) { + Options options = GetDefaultOptions(); + options.env = fault_injection_env_.get(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.create_if_missing = true; + std::unique_ptr compaction_filter_guard( + new ReadBlobCompactionFilter); + options.compaction_filter = compaction_filter_guard.get(); + + DestroyAndReopen(options); + constexpr char key[] = "foo"; + constexpr char blob_value[] = "foo_blob_value"; + ASSERT_OK(Put(key, blob_value)); + ASSERT_OK(Flush()); + + SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) { + fault_injection_env_->SetFilesystemActive(false, + Status::IOError(sync_point_)); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr) + .IsIOError()); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + RegisterCustomObjects(argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,718 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/blob_index.h" +#include "db/blob/blob_log_format.h" +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "test_util/sync_point.h" + +namespace ROCKSDB_NAMESPACE { + +class DBBlobCompactionTest : public DBTestBase { + public: + explicit DBBlobCompactionTest() + : DBTestBase("db_blob_compaction_test", /*env_do_fsync=*/false) {} + +#ifndef ROCKSDB_LITE + const std::vector& GetCompactionStats() { + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + assert(versions->GetColumnFamilySet()); + + ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); + assert(cfd); + + const InternalStats* const internal_stats = cfd->internal_stats(); + assert(internal_stats); + + return internal_stats->TEST_GetCompactionStats(); + } +#endif // ROCKSDB_LITE +}; + +namespace { + +class FilterByKeyLength : public CompactionFilter { + public: + explicit FilterByKeyLength(size_t len) : length_threshold_(len) {} + const char* Name() const override { + return "rocksdb.compaction.filter.by.key.length"; + } + CompactionFilter::Decision FilterBlobByKey( + int /*level*/, const Slice& key, std::string* /*new_value*/, + std::string* /*skip_until*/) const override { + if (key.size() < length_threshold_) { + return CompactionFilter::Decision::kRemove; + } + return CompactionFilter::Decision::kKeep; + } + + private: + size_t length_threshold_; +}; + +class BadBlobCompactionFilter : public CompactionFilter { + public: + explicit BadBlobCompactionFilter(std::string prefix, + CompactionFilter::Decision filter_by_key, + CompactionFilter::Decision filter_v2) + : prefix_(std::move(prefix)), + filter_blob_by_key_(filter_by_key), + filter_v2_(filter_v2) {} + const char* Name() const override { return "rocksdb.compaction.filter.bad"; } + CompactionFilter::Decision FilterBlobByKey( + int /*level*/, const Slice& key, std::string* /*new_value*/, + std::string* /*skip_until*/) const override { + if (key.size() >= prefix_.size() && + 0 == strncmp(prefix_.data(), key.data(), prefix_.size())) { + return CompactionFilter::Decision::kUndetermined; + } + return filter_blob_by_key_; + } + CompactionFilter::Decision FilterV2( + int /*level*/, const Slice& /*key*/, ValueType /*value_type*/, + const Slice& /*existing_value*/, std::string* /*new_value*/, + std::string* /*skip_until*/) const override { + return filter_v2_; + } + + private: + const std::string prefix_; + const CompactionFilter::Decision filter_blob_by_key_; + const CompactionFilter::Decision filter_v2_; +}; + +class ValueBlindWriteFilter : public CompactionFilter { + public: + explicit ValueBlindWriteFilter(std::string new_val) + : new_value_(std::move(new_val)) {} + const char* Name() const override { + return "rocksdb.compaction.filter.blind.write"; + } + CompactionFilter::Decision FilterBlobByKey( + int level, const Slice& key, std::string* new_value, + std::string* skip_until) const override; + + private: + const std::string new_value_; +}; + +CompactionFilter::Decision ValueBlindWriteFilter::FilterBlobByKey( + int /*level*/, const Slice& /*key*/, std::string* new_value, + std::string* /*skip_until*/) const { + assert(new_value); + new_value->assign(new_value_); + return CompactionFilter::Decision::kChangeValue; +} + +class ValueMutationFilter : public CompactionFilter { + public: + explicit ValueMutationFilter(std::string padding) + : padding_(std::move(padding)) {} + const char* Name() const override { + return "rocksdb.compaction.filter.value.mutation"; + } + CompactionFilter::Decision FilterV2(int level, const Slice& key, + ValueType value_type, + const Slice& existing_value, + std::string* new_value, + std::string* skip_until) const override; + + private: + const std::string padding_; +}; + +CompactionFilter::Decision ValueMutationFilter::FilterV2( + int /*level*/, const Slice& /*key*/, ValueType value_type, + const Slice& existing_value, std::string* new_value, + std::string* /*skip_until*/) const { + assert(CompactionFilter::ValueType::kBlobIndex != value_type); + if (CompactionFilter::ValueType::kValue != value_type) { + return CompactionFilter::Decision::kKeep; + } + assert(new_value); + new_value->assign(existing_value.data(), existing_value.size()); + new_value->append(padding_); + return CompactionFilter::Decision::kChangeValue; +} + +class AlwaysKeepFilter : public CompactionFilter { + public: + explicit AlwaysKeepFilter() = default; + const char* Name() const override { + return "rocksdb.compaction.filter.always.keep"; + } + CompactionFilter::Decision FilterV2( + int /*level*/, const Slice& /*key*/, ValueType /*value_type*/, + const Slice& /*existing_value*/, std::string* /*new_value*/, + std::string* /*skip_until*/) const override { + return CompactionFilter::Decision::kKeep; + } +}; + +class SkipUntilFilter : public CompactionFilter { + public: + explicit SkipUntilFilter(std::string skip_until) + : skip_until_(std::move(skip_until)) {} + + const char* Name() const override { + return "rocksdb.compaction.filter.skip.until"; + } + + CompactionFilter::Decision FilterV2(int /* level */, const Slice& /* key */, + ValueType /* value_type */, + const Slice& /* existing_value */, + std::string* /* new_value */, + std::string* skip_until) const override { + assert(skip_until); + *skip_until = skip_until_; + + return CompactionFilter::Decision::kRemoveAndSkipUntil; + } + + private: + std::string skip_until_; +}; + +} // anonymous namespace + +class DBBlobBadCompactionFilterTest + : public DBBlobCompactionTest, + public testing::WithParamInterface< + std::tuple> { + public: + explicit DBBlobBadCompactionFilterTest() + : compaction_filter_guard_(new BadBlobCompactionFilter( + std::get<0>(GetParam()), std::get<1>(GetParam()), + std::get<2>(GetParam()))) {} + + protected: + std::unique_ptr compaction_filter_guard_; +}; + +INSTANTIATE_TEST_CASE_P( + BadCompactionFilter, DBBlobBadCompactionFilterTest, + testing::Combine( + testing::Values("a"), + testing::Values(CompactionFilter::Decision::kChangeBlobIndex, + CompactionFilter::Decision::kIOError), + testing::Values(CompactionFilter::Decision::kUndetermined, + CompactionFilter::Decision::kChangeBlobIndex, + CompactionFilter::Decision::kIOError))); + +TEST_F(DBBlobCompactionTest, FilterByKeyLength) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.create_if_missing = true; + constexpr size_t kKeyLength = 2; + std::unique_ptr compaction_filter_guard( + new FilterByKeyLength(kKeyLength)); + options.compaction_filter = compaction_filter_guard.get(); + + constexpr char short_key[] = "a"; + constexpr char long_key[] = "abc"; + constexpr char blob_value[] = "value"; + + DestroyAndReopen(options); + ASSERT_OK(Put(short_key, blob_value)); + ASSERT_OK(Put(long_key, blob_value)); + ASSERT_OK(Flush()); + CompactRangeOptions cro; + ASSERT_OK(db_->CompactRange(cro, /*begin=*/nullptr, /*end=*/nullptr)); + std::string value; + ASSERT_TRUE(db_->Get(ReadOptions(), short_key, &value).IsNotFound()); + value.clear(); + ASSERT_OK(db_->Get(ReadOptions(), long_key, &value)); + ASSERT_EQ("value", value); + +#ifndef ROCKSDB_LITE + const auto& compaction_stats = GetCompactionStats(); + ASSERT_GE(compaction_stats.size(), 2); + + // Filter decides between kKeep and kRemove solely based on key; + // this involves neither reading nor writing blobs + ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); + ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); +#endif // ROCKSDB_LITE + + Close(); +} + +TEST_F(DBBlobCompactionTest, BlindWriteFilter) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.create_if_missing = true; + constexpr char new_blob_value[] = "new_blob_value"; + std::unique_ptr compaction_filter_guard( + new ValueBlindWriteFilter(new_blob_value)); + options.compaction_filter = compaction_filter_guard.get(); + DestroyAndReopen(options); + const std::vector keys = {"a", "b", "c"}; + const std::vector values = {"a_value", "b_value", "c_value"}; + assert(keys.size() == values.size()); + for (size_t i = 0; i < keys.size(); ++i) { + ASSERT_OK(Put(keys[i], values[i])); + } + ASSERT_OK(Flush()); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + for (const auto& key : keys) { + ASSERT_EQ(new_blob_value, Get(key)); + } + +#ifndef ROCKSDB_LITE + const auto& compaction_stats = GetCompactionStats(); + ASSERT_GE(compaction_stats.size(), 2); + + // Filter unconditionally changes value in FilterBlobByKey; + // this involves writing but not reading blobs + ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); + ASSERT_GT(compaction_stats[1].bytes_written_blob, 0); +#endif // ROCKSDB_LITE + + Close(); +} + +TEST_F(DBBlobCompactionTest, SkipUntilFilter) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + + std::unique_ptr compaction_filter_guard( + new SkipUntilFilter("z")); + options.compaction_filter = compaction_filter_guard.get(); + + Reopen(options); + + const std::vector keys{"a", "b", "c"}; + const std::vector values{"a_value", "b_value", "c_value"}; + assert(keys.size() == values.size()); + + for (size_t i = 0; i < keys.size(); ++i) { + ASSERT_OK(Put(keys[i], values[i])); + } + + ASSERT_OK(Flush()); + + int process_in_flow_called = 0; + + SyncPoint::GetInstance()->SetCallBack( + "BlobCountingIterator::UpdateAndCountBlobIfNeeded:ProcessInFlow", + [&process_in_flow_called](void* /* arg */) { ++process_in_flow_called; }); + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /* begin */ nullptr, + /* end */ nullptr)); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + for (const auto& key : keys) { + ASSERT_EQ(Get(key), "NOT_FOUND"); + } + + // Make sure SkipUntil was performed using iteration rather than Seek + ASSERT_EQ(process_in_flow_called, keys.size()); + + Close(); +} + +TEST_P(DBBlobBadCompactionFilterTest, BadDecisionFromCompactionFilter) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.create_if_missing = true; + options.compaction_filter = compaction_filter_guard_.get(); + DestroyAndReopen(options); + ASSERT_OK(Put("b", "value")); + ASSERT_OK(Flush()); + ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr) + .IsNotSupported()); + Close(); + + DestroyAndReopen(options); + std::string key(std::get<0>(GetParam())); + ASSERT_OK(Put(key, "value")); + ASSERT_OK(Flush()); + ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr) + .IsNotSupported()); + Close(); +} + +TEST_F(DBBlobCompactionTest, CompactionFilter_InlinedTTLIndex) { + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.enable_blob_files = true; + options.min_blob_size = 0; + std::unique_ptr compaction_filter_guard( + new ValueMutationFilter("")); + options.compaction_filter = compaction_filter_guard.get(); + DestroyAndReopen(options); + constexpr char key[] = "key"; + constexpr char blob[] = "blob"; + // Fake an inlined TTL blob index. + std::string blob_index; + constexpr uint64_t expiration = 1234567890; + BlobIndex::EncodeInlinedTTL(&blob_index, expiration, blob); + WriteBatch batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index)); + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + ASSERT_OK(Flush()); + ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr) + .IsCorruption()); + Close(); +} + +TEST_F(DBBlobCompactionTest, CompactionFilter) { + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.enable_blob_files = true; + options.min_blob_size = 0; + constexpr char padding[] = "_delta"; + std::unique_ptr compaction_filter_guard( + new ValueMutationFilter(padding)); + options.compaction_filter = compaction_filter_guard.get(); + DestroyAndReopen(options); + const std::vector> kvs = { + {"a", "a_value"}, {"b", "b_value"}, {"c", "c_value"}}; + for (const auto& kv : kvs) { + ASSERT_OK(Put(kv.first, kv.second)); + } + ASSERT_OK(Flush()); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + for (const auto& kv : kvs) { + ASSERT_EQ(kv.second + std::string(padding), Get(kv.first)); + } + +#ifndef ROCKSDB_LITE + const auto& compaction_stats = GetCompactionStats(); + ASSERT_GE(compaction_stats.size(), 2); + + // Filter changes the value using the previous value in FilterV2; + // this involves reading and writing blobs + ASSERT_GT(compaction_stats[1].bytes_read_blob, 0); + ASSERT_GT(compaction_stats[1].bytes_written_blob, 0); +#endif // ROCKSDB_LITE + + Close(); +} + +TEST_F(DBBlobCompactionTest, CorruptedBlobIndex) { + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.enable_blob_files = true; + options.min_blob_size = 0; + std::unique_ptr compaction_filter_guard( + new ValueMutationFilter("")); + options.compaction_filter = compaction_filter_guard.get(); + DestroyAndReopen(options); + // Mock a corrupted blob index + constexpr char key[] = "key"; + std::string blob_idx("blob_idx"); + WriteBatch write_batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&write_batch, 0, key, blob_idx)); + ASSERT_OK(db_->Write(WriteOptions(), &write_batch)); + ASSERT_OK(Flush()); + ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr) + .IsCorruption()); + Close(); +} + +TEST_F(DBBlobCompactionTest, CompactionFilterReadBlobAndKeep) { + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.enable_blob_files = true; + options.min_blob_size = 0; + std::unique_ptr compaction_filter_guard( + new AlwaysKeepFilter()); + options.compaction_filter = compaction_filter_guard.get(); + DestroyAndReopen(options); + ASSERT_OK(Put("foo", "foo_value")); + ASSERT_OK(Flush()); + std::vector blob_files = GetBlobFileNumbers(); + ASSERT_EQ(1, blob_files.size()); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + ASSERT_EQ(blob_files, GetBlobFileNumbers()); + +#ifndef ROCKSDB_LITE + const auto& compaction_stats = GetCompactionStats(); + ASSERT_GE(compaction_stats.size(), 2); + + // Filter decides to keep the existing value in FilterV2; + // this involves reading but not writing blobs + ASSERT_GT(compaction_stats[1].bytes_read_blob, 0); + ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); +#endif // ROCKSDB_LITE + + Close(); +} + +TEST_F(DBBlobCompactionTest, TrackGarbage) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + + Reopen(options); + + // First table+blob file pair: 4 blobs with different keys + constexpr char first_key[] = "first_key"; + constexpr char first_value[] = "first_value"; + constexpr char second_key[] = "second_key"; + constexpr char second_value[] = "second_value"; + constexpr char third_key[] = "third_key"; + constexpr char third_value[] = "third_value"; + constexpr char fourth_key[] = "fourth_key"; + constexpr char fourth_value[] = "fourth_value"; + + ASSERT_OK(Put(first_key, first_value)); + ASSERT_OK(Put(second_key, second_value)); + ASSERT_OK(Put(third_key, third_value)); + ASSERT_OK(Put(fourth_key, fourth_value)); + ASSERT_OK(Flush()); + + // Second table+blob file pair: overwrite 2 existing keys + constexpr char new_first_value[] = "new_first_value"; + constexpr char new_second_value[] = "new_second_value"; + + ASSERT_OK(Put(first_key, new_first_value)); + ASSERT_OK(Put(second_key, new_second_value)); + ASSERT_OK(Flush()); + + // Compact them together. The first blob file should have 2 garbage blobs + // corresponding to the 2 overwritten keys. + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + assert(versions->GetColumnFamilySet()); + + ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); + assert(cfd); + + Version* const current = cfd->current(); + assert(current); + + const VersionStorageInfo* const storage_info = current->storage_info(); + assert(storage_info); + + const auto& blob_files = storage_info->GetBlobFiles(); + ASSERT_EQ(blob_files.size(), 2); + + { + auto it = blob_files.begin(); + const auto& meta = it->second; + assert(meta); + + constexpr uint64_t first_expected_bytes = + sizeof(first_value) - 1 + + BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(first_key) - + 1); + constexpr uint64_t second_expected_bytes = + sizeof(second_value) - 1 + + BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(second_key) - + 1); + constexpr uint64_t third_expected_bytes = + sizeof(third_value) - 1 + + BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(third_key) - + 1); + constexpr uint64_t fourth_expected_bytes = + sizeof(fourth_value) - 1 + + BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(fourth_key) - + 1); + + ASSERT_EQ(meta->GetTotalBlobCount(), 4); + ASSERT_EQ(meta->GetTotalBlobBytes(), + first_expected_bytes + second_expected_bytes + + third_expected_bytes + fourth_expected_bytes); + ASSERT_EQ(meta->GetGarbageBlobCount(), 2); + ASSERT_EQ(meta->GetGarbageBlobBytes(), + first_expected_bytes + second_expected_bytes); + } + + { + auto it = blob_files.rbegin(); + const auto& meta = it->second; + assert(meta); + + constexpr uint64_t new_first_expected_bytes = + sizeof(new_first_value) - 1 + + BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(first_key) - + 1); + constexpr uint64_t new_second_expected_bytes = + sizeof(new_second_value) - 1 + + BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(second_key) - + 1); + + ASSERT_EQ(meta->GetTotalBlobCount(), 2); + ASSERT_EQ(meta->GetTotalBlobBytes(), + new_first_expected_bytes + new_second_expected_bytes); + ASSERT_EQ(meta->GetGarbageBlobCount(), 0); + ASSERT_EQ(meta->GetGarbageBlobBytes(), 0); + } +} + +TEST_F(DBBlobCompactionTest, MergeBlobWithBase) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + options.disable_auto_compactions = true; + + Reopen(options); + ASSERT_OK(Put("Key1", "v1_1")); + ASSERT_OK(Put("Key2", "v2_1")); + ASSERT_OK(Flush()); + + ASSERT_OK(Merge("Key1", "v1_2")); + ASSERT_OK(Merge("Key2", "v2_2")); + ASSERT_OK(Flush()); + + ASSERT_OK(Merge("Key1", "v1_3")); + ASSERT_OK(Flush()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + ASSERT_EQ(Get("Key1"), "v1_1,v1_2,v1_3"); + ASSERT_EQ(Get("Key2"), "v2_1,v2_2"); + Close(); +} + +TEST_F(DBBlobCompactionTest, CompactionReadaheadGarbageCollection) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.enable_blob_garbage_collection = true; + options.blob_garbage_collection_age_cutoff = 1.0; + options.blob_compaction_readahead_size = 1 << 10; + options.disable_auto_compactions = true; + + Reopen(options); + + ASSERT_OK(Put("key", "lime")); + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Flush()); + + ASSERT_OK(Put("key", "pie")); + ASSERT_OK(Put("foo", "baz")); + ASSERT_OK(Flush()); + + size_t num_non_prefetch_reads = 0; + SyncPoint::GetInstance()->SetCallBack( + "BlobFileReader::GetBlob:ReadFromFile", + [&num_non_prefetch_reads](void* /* arg */) { ++num_non_prefetch_reads; }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + ASSERT_EQ(Get("key"), "pie"); + ASSERT_EQ(Get("foo"), "baz"); + ASSERT_EQ(num_non_prefetch_reads, 0); + + Close(); +} + +TEST_F(DBBlobCompactionTest, CompactionReadaheadFilter) { + Options options = GetDefaultOptions(); + + std::unique_ptr compaction_filter_guard( + new ValueMutationFilter("pie")); + + options.compaction_filter = compaction_filter_guard.get(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.blob_compaction_readahead_size = 1 << 10; + options.disable_auto_compactions = true; + + Reopen(options); + + ASSERT_OK(Put("key", "lime")); + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Flush()); + + size_t num_non_prefetch_reads = 0; + SyncPoint::GetInstance()->SetCallBack( + "BlobFileReader::GetBlob:ReadFromFile", + [&num_non_prefetch_reads](void* /* arg */) { ++num_non_prefetch_reads; }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + ASSERT_EQ(Get("key"), "limepie"); + ASSERT_EQ(Get("foo"), "barpie"); + ASSERT_EQ(num_non_prefetch_reads, 0); + + Close(); +} + +TEST_F(DBBlobCompactionTest, CompactionReadaheadMerge) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.blob_compaction_readahead_size = 1 << 10; + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + options.disable_auto_compactions = true; + + Reopen(options); + + ASSERT_OK(Put("key", "lime")); + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Flush()); + + ASSERT_OK(Merge("key", "pie")); + ASSERT_OK(Merge("foo", "baz")); + ASSERT_OK(Flush()); + + size_t num_non_prefetch_reads = 0; + SyncPoint::GetInstance()->SetCallBack( + "BlobFileReader::GetBlob:ReadFromFile", + [&num_non_prefetch_reads](void* /* arg */) { ++num_non_prefetch_reads; }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + ASSERT_EQ(Get("key"), "lime,pie"); + ASSERT_EQ(Get("foo"), "bar,baz"); + ASSERT_EQ(num_non_prefetch_reads, 0); + + Close(); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + RegisterCustomObjects(argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,82 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "test_util/sync_point.h" + +namespace ROCKSDB_NAMESPACE { + +class DBBlobCorruptionTest : public DBTestBase { + protected: + DBBlobCorruptionTest() + : DBTestBase("db_blob_corruption_test", /* env_do_fsync */ false) {} + + void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) { + // Pick file to corrupt + std::vector filenames; + ASSERT_OK(env_->GetChildren(dbname_, &filenames)); + uint64_t number; + FileType type; + std::string fname; + uint64_t picked_number = kInvalidBlobFileNumber; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type) && type == filetype && + number > picked_number) { // Pick latest file + fname = dbname_ + "/" + filenames[i]; + picked_number = number; + } + } + ASSERT_TRUE(!fname.empty()) << filetype; + ASSERT_OK(test::CorruptFile(env_, fname, offset, bytes_to_corrupt)); + } +}; + +#ifndef ROCKSDB_LITE +TEST_F(DBBlobCorruptionTest, VerifyWholeBlobFileChecksum) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + options.create_if_missing = true; + options.file_checksum_gen_factory = + ROCKSDB_NAMESPACE::GetFileChecksumGenCrc32cFactory(); + Reopen(options); + + ASSERT_OK(Put(Slice("key_1"), Slice("blob_value_1"))); + ASSERT_OK(Flush()); + ASSERT_OK(Put(Slice("key_2"), Slice("blob_value_2"))); + ASSERT_OK(Flush()); + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); + Close(); + + Corrupt(kBlobFile, 0, 2); + + ASSERT_OK(TryReopen(options)); + + int count{0}; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::VerifyFullFileChecksum:mismatch", [&](void* arg) { + const Status* s = static_cast(arg); + ASSERT_NE(s, nullptr); + ++count; + ASSERT_NOK(*s); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsCorruption()); + ASSERT_EQ(1, count); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); +} +#endif // !ROCKSDB_LITE +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + RegisterCustomObjects(argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/db_blob_index_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_index_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/db_blob_index_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/db_blob_index_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,572 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include +#include +#include +#include + +#include "db/arena_wrapped_db_iter.h" +#include "db/column_family.h" +#include "db/db_iter.h" +#include "db/db_test_util.h" +#include "db/dbformat.h" +#include "db/write_batch_internal.h" +#include "port/port.h" +#include "port/stack_trace.h" +#include "util/string_util.h" +#include "utilities/merge_operators.h" + +namespace ROCKSDB_NAMESPACE { + +// kTypeBlobIndex is a value type used by BlobDB only. The base rocksdb +// should accept the value type on write, and report not supported value +// for reads, unless caller request for it explicitly. The base rocksdb +// doesn't understand format of actual blob index (the value). +class DBBlobIndexTest : public DBTestBase { + public: + enum Tier { + kMemtable = 0, + kImmutableMemtables = 1, + kL0SstFile = 2, + kLnSstFile = 3, + }; + const std::vector kAllTiers = {Tier::kMemtable, + Tier::kImmutableMemtables, + Tier::kL0SstFile, Tier::kLnSstFile}; + + DBBlobIndexTest() : DBTestBase("db_blob_index_test", /*env_do_fsync=*/true) {} + + ColumnFamilyHandle* cfh() { return dbfull()->DefaultColumnFamily(); } + + ColumnFamilyData* cfd() { + return static_cast_with_check(cfh())->cfd(); + } + + Status PutBlobIndex(WriteBatch* batch, const Slice& key, + const Slice& blob_index) { + return WriteBatchInternal::PutBlobIndex(batch, cfd()->GetID(), key, + blob_index); + } + + Status Write(WriteBatch* batch) { + return dbfull()->Write(WriteOptions(), batch); + } + + std::string GetImpl(const Slice& key, bool* is_blob_index = nullptr, + const Snapshot* snapshot = nullptr) { + ReadOptions read_options; + read_options.snapshot = snapshot; + PinnableSlice value; + DBImpl::GetImplOptions get_impl_options; + get_impl_options.column_family = cfh(); + get_impl_options.value = &value; + get_impl_options.is_blob_index = is_blob_index; + auto s = dbfull()->GetImpl(read_options, key, get_impl_options); + if (s.IsNotFound()) { + return "NOT_FOUND"; + } + if (s.IsCorruption()) { + return "CORRUPTION"; + } + if (s.IsNotSupported()) { + return "NOT_SUPPORTED"; + } + if (!s.ok()) { + return s.ToString(); + } + return value.ToString(); + } + + std::string GetBlobIndex(const Slice& key, + const Snapshot* snapshot = nullptr) { + bool is_blob_index = false; + std::string value = GetImpl(key, &is_blob_index, snapshot); + if (!is_blob_index) { + return "NOT_BLOB"; + } + return value; + } + + ArenaWrappedDBIter* GetBlobIterator() { + return dbfull()->NewIteratorImpl( + ReadOptions(), cfd(), dbfull()->GetLatestSequenceNumber(), + nullptr /*read_callback*/, true /*expose_blob_index*/); + } + + Options GetTestOptions() { + Options options; + options.env = CurrentOptions().env; + options.create_if_missing = true; + options.num_levels = 2; + options.disable_auto_compactions = true; + // Disable auto flushes. + options.max_write_buffer_number = 10; + options.min_write_buffer_number_to_merge = 10; + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + return options; + } + + void MoveDataTo(Tier tier) { + switch (tier) { + case Tier::kMemtable: + break; + case Tier::kImmutableMemtables: + ASSERT_OK(dbfull()->TEST_SwitchMemtable()); + break; + case Tier::kL0SstFile: + ASSERT_OK(Flush()); + break; + case Tier::kLnSstFile: + ASSERT_OK(Flush()); + ASSERT_OK(Put("a", "dummy")); + ASSERT_OK(Put("z", "dummy")); + ASSERT_OK(Flush()); + ASSERT_OK( + dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); +#ifndef ROCKSDB_LITE + ASSERT_EQ("0,1", FilesPerLevel()); +#endif // !ROCKSDB_LITE + break; + } + } +}; + +// Should be able to write kTypeBlobIndex to memtables and SST files. +TEST_F(DBBlobIndexTest, Write) { + for (auto tier : kAllTiers) { + DestroyAndReopen(GetTestOptions()); + for (int i = 1; i <= 5; i++) { + std::string index = ToString(i); + WriteBatch batch; + ASSERT_OK(PutBlobIndex(&batch, "key" + index, "blob" + index)); + ASSERT_OK(Write(&batch)); + } + MoveDataTo(tier); + for (int i = 1; i <= 5; i++) { + std::string index = ToString(i); + ASSERT_EQ("blob" + index, GetBlobIndex("key" + index)); + } + } +} + +// Note: the following test case pertains to the StackableDB-based BlobDB +// implementation. Get should be able to return blob index if is_blob_index is +// provided, otherwise it should return Status::NotSupported (when reading from +// memtable) or Status::Corruption (when reading from SST). Reading from SST +// returns Corruption because we can't differentiate between the application +// accidentally opening the base DB of a stacked BlobDB and actual corruption +// when using the integrated BlobDB. +TEST_F(DBBlobIndexTest, Get) { + for (auto tier : kAllTiers) { + DestroyAndReopen(GetTestOptions()); + WriteBatch batch; + ASSERT_OK(batch.Put("key", "value")); + ASSERT_OK(PutBlobIndex(&batch, "blob_key", "blob_index")); + ASSERT_OK(Write(&batch)); + MoveDataTo(tier); + // Verify normal value + bool is_blob_index = false; + PinnableSlice value; + ASSERT_EQ("value", Get("key")); + ASSERT_EQ("value", GetImpl("key")); + ASSERT_EQ("value", GetImpl("key", &is_blob_index)); + ASSERT_FALSE(is_blob_index); + // Verify blob index + if (tier <= kImmutableMemtables) { + ASSERT_TRUE(Get("blob_key", &value).IsNotSupported()); + ASSERT_EQ("NOT_SUPPORTED", GetImpl("blob_key")); + } else { + ASSERT_TRUE(Get("blob_key", &value).IsCorruption()); + ASSERT_EQ("CORRUPTION", GetImpl("blob_key")); + } + ASSERT_EQ("blob_index", GetImpl("blob_key", &is_blob_index)); + ASSERT_TRUE(is_blob_index); + } +} + +// Note: the following test case pertains to the StackableDB-based BlobDB +// implementation. Get should NOT return Status::NotSupported/Status::Corruption +// if blob index is updated with a normal value. See the test case above for +// more details. +TEST_F(DBBlobIndexTest, Updated) { + for (auto tier : kAllTiers) { + DestroyAndReopen(GetTestOptions()); + WriteBatch batch; + for (int i = 0; i < 10; i++) { + ASSERT_OK(PutBlobIndex(&batch, "key" + ToString(i), "blob_index")); + } + ASSERT_OK(Write(&batch)); + // Avoid blob values from being purged. + const Snapshot* snapshot = dbfull()->GetSnapshot(); + ASSERT_OK(Put("key1", "new_value")); + ASSERT_OK(Merge("key2", "a")); + ASSERT_OK(Merge("key2", "b")); + ASSERT_OK(Merge("key2", "c")); + ASSERT_OK(Delete("key3")); + ASSERT_OK(SingleDelete("key4")); + ASSERT_OK(Delete("key5")); + ASSERT_OK(Merge("key5", "a")); + ASSERT_OK(Merge("key5", "b")); + ASSERT_OK(Merge("key5", "c")); + ASSERT_OK(dbfull()->DeleteRange(WriteOptions(), cfh(), "key6", "key9")); + MoveDataTo(tier); + for (int i = 0; i < 10; i++) { + ASSERT_EQ("blob_index", GetBlobIndex("key" + ToString(i), snapshot)); + } + ASSERT_EQ("new_value", Get("key1")); + if (tier <= kImmutableMemtables) { + ASSERT_EQ("NOT_SUPPORTED", GetImpl("key2")); + } else { + ASSERT_EQ("CORRUPTION", GetImpl("key2")); + } + ASSERT_EQ("NOT_FOUND", Get("key3")); + ASSERT_EQ("NOT_FOUND", Get("key4")); + ASSERT_EQ("a,b,c", GetImpl("key5")); + for (int i = 6; i < 9; i++) { + ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i))); + } + ASSERT_EQ("blob_index", GetBlobIndex("key9")); + dbfull()->ReleaseSnapshot(snapshot); + } +} + +// Note: the following test case pertains to the StackableDB-based BlobDB +// implementation. When a blob iterator is used, it should set the +// expose_blob_index flag for the underlying DBIter, and retrieve/return the +// corresponding blob value. If a regular DBIter is created (i.e. +// expose_blob_index is not set), it should return Status::Corruption. +TEST_F(DBBlobIndexTest, Iterate) { + const std::vector> data = { + /*00*/ {kTypeValue}, + /*01*/ {kTypeBlobIndex}, + /*02*/ {kTypeValue}, + /*03*/ {kTypeBlobIndex, kTypeValue}, + /*04*/ {kTypeValue}, + /*05*/ {kTypeValue, kTypeBlobIndex}, + /*06*/ {kTypeValue}, + /*07*/ {kTypeDeletion, kTypeBlobIndex}, + /*08*/ {kTypeValue}, + /*09*/ {kTypeSingleDeletion, kTypeBlobIndex}, + /*10*/ {kTypeValue}, + /*11*/ {kTypeMerge, kTypeMerge, kTypeMerge, kTypeBlobIndex}, + /*12*/ {kTypeValue}, + /*13*/ + {kTypeMerge, kTypeMerge, kTypeMerge, kTypeDeletion, kTypeBlobIndex}, + /*14*/ {kTypeValue}, + /*15*/ {kTypeBlobIndex}, + /*16*/ {kTypeValue}, + }; + + auto get_key = [](int index) { + char buf[20]; + snprintf(buf, sizeof(buf), "%02d", index); + return "key" + std::string(buf); + }; + + auto get_value = [&](int index, int version) { + return get_key(index) + "_value" + ToString(version); + }; + + auto check_iterator = [&](Iterator* iterator, Status::Code expected_status, + const Slice& expected_value) { + ASSERT_EQ(expected_status, iterator->status().code()); + if (expected_status == Status::kOk) { + ASSERT_TRUE(iterator->Valid()); + ASSERT_EQ(expected_value, iterator->value()); + } else { + ASSERT_FALSE(iterator->Valid()); + } + }; + + auto create_normal_iterator = [&]() -> Iterator* { + return dbfull()->NewIterator(ReadOptions()); + }; + + auto create_blob_iterator = [&]() -> Iterator* { return GetBlobIterator(); }; + + auto check_is_blob = [&](bool is_blob) { + return [is_blob](Iterator* iterator) { + ASSERT_EQ(is_blob, + reinterpret_cast(iterator)->IsBlob()); + }; + }; + + auto verify = [&](int index, Status::Code expected_status, + const Slice& forward_value, const Slice& backward_value, + std::function create_iterator, + std::function extra_check = nullptr) { + // Seek + auto* iterator = create_iterator(); + ASSERT_OK(iterator->status()); + ASSERT_OK(iterator->Refresh()); + iterator->Seek(get_key(index)); + check_iterator(iterator, expected_status, forward_value); + if (extra_check) { + extra_check(iterator); + } + delete iterator; + + // Next + iterator = create_iterator(); + ASSERT_OK(iterator->Refresh()); + iterator->Seek(get_key(index - 1)); + ASSERT_TRUE(iterator->Valid()); + ASSERT_OK(iterator->status()); + iterator->Next(); + check_iterator(iterator, expected_status, forward_value); + if (extra_check) { + extra_check(iterator); + } + delete iterator; + + // SeekForPrev + iterator = create_iterator(); + ASSERT_OK(iterator->status()); + ASSERT_OK(iterator->Refresh()); + iterator->SeekForPrev(get_key(index)); + check_iterator(iterator, expected_status, backward_value); + if (extra_check) { + extra_check(iterator); + } + delete iterator; + + // Prev + iterator = create_iterator(); + iterator->Seek(get_key(index + 1)); + ASSERT_TRUE(iterator->Valid()); + ASSERT_OK(iterator->status()); + iterator->Prev(); + check_iterator(iterator, expected_status, backward_value); + if (extra_check) { + extra_check(iterator); + } + delete iterator; + }; + + for (auto tier : {Tier::kMemtable} /*kAllTiers*/) { + // Avoid values from being purged. + std::vector snapshots; + DestroyAndReopen(GetTestOptions()); + + // fill data + for (int i = 0; i < static_cast(data.size()); i++) { + for (int j = static_cast(data[i].size()) - 1; j >= 0; j--) { + std::string key = get_key(i); + std::string value = get_value(i, j); + WriteBatch batch; + switch (data[i][j]) { + case kTypeValue: + ASSERT_OK(Put(key, value)); + break; + case kTypeDeletion: + ASSERT_OK(Delete(key)); + break; + case kTypeSingleDeletion: + ASSERT_OK(SingleDelete(key)); + break; + case kTypeMerge: + ASSERT_OK(Merge(key, value)); + break; + case kTypeBlobIndex: + ASSERT_OK(PutBlobIndex(&batch, key, value)); + ASSERT_OK(Write(&batch)); + break; + default: + FAIL(); + }; + } + snapshots.push_back(dbfull()->GetSnapshot()); + } + ASSERT_OK( + dbfull()->DeleteRange(WriteOptions(), cfh(), get_key(15), get_key(16))); + snapshots.push_back(dbfull()->GetSnapshot()); + MoveDataTo(tier); + + // Normal iterator + verify(1, Status::kCorruption, "", "", create_normal_iterator); + verify(3, Status::kCorruption, "", "", create_normal_iterator); + verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), + create_normal_iterator); + verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), + create_normal_iterator); + verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), + create_normal_iterator); + verify(11, Status::kCorruption, "", "", create_normal_iterator); + verify(13, Status::kOk, + get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), + get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), + create_normal_iterator); + verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), + create_normal_iterator); + + // Iterator with blob support + verify(1, Status::kOk, get_value(1, 0), get_value(1, 0), + create_blob_iterator, check_is_blob(true)); + verify(3, Status::kOk, get_value(3, 0), get_value(3, 0), + create_blob_iterator, check_is_blob(true)); + verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), + create_blob_iterator, check_is_blob(false)); + verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), + create_blob_iterator, check_is_blob(false)); + verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), + create_blob_iterator, check_is_blob(false)); + if (tier <= kImmutableMemtables) { + verify(11, Status::kNotSupported, "", "", create_blob_iterator); + } else { + verify(11, Status::kCorruption, "", "", create_blob_iterator); + } + verify(13, Status::kOk, + get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), + get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), + create_blob_iterator, check_is_blob(false)); + verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), + create_blob_iterator, check_is_blob(false)); + +#ifndef ROCKSDB_LITE + // Iterator with blob support and using seek. + ASSERT_OK(dbfull()->SetOptions( + cfh(), {{"max_sequential_skip_in_iterations", "0"}})); + verify(1, Status::kOk, get_value(1, 0), get_value(1, 0), + create_blob_iterator, check_is_blob(true)); + verify(3, Status::kOk, get_value(3, 0), get_value(3, 0), + create_blob_iterator, check_is_blob(true)); + verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), + create_blob_iterator, check_is_blob(false)); + verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), + create_blob_iterator, check_is_blob(false)); + verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), + create_blob_iterator, check_is_blob(false)); + if (tier <= kImmutableMemtables) { + verify(11, Status::kNotSupported, "", "", create_blob_iterator); + } else { + verify(11, Status::kCorruption, "", "", create_blob_iterator); + } + verify(13, Status::kOk, + get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), + get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), + create_blob_iterator, check_is_blob(false)); + verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), + create_blob_iterator, check_is_blob(false)); +#endif // !ROCKSDB_LITE + + for (auto* snapshot : snapshots) { + dbfull()->ReleaseSnapshot(snapshot); + } + } +} + +TEST_F(DBBlobIndexTest, IntegratedBlobIterate) { + const std::vector> data = { + /*00*/ {"Put"}, + /*01*/ {"Put", "Merge", "Merge", "Merge"}, + /*02*/ {"Put"}}; + + auto get_key = [](size_t index) { return ("key" + std::to_string(index)); }; + + auto get_value = [&](size_t index, size_t version) { + return get_key(index) + "_value" + ToString(version); + }; + + auto check_iterator = [&](Iterator* iterator, Status expected_status, + const Slice& expected_value) { + ASSERT_EQ(expected_status, iterator->status()); + if (expected_status.ok()) { + ASSERT_TRUE(iterator->Valid()); + ASSERT_EQ(expected_value, iterator->value()); + } else { + ASSERT_FALSE(iterator->Valid()); + } + }; + + auto verify = [&](size_t index, Status expected_status, + const Slice& expected_value) { + // Seek + { + Iterator* iterator = db_->NewIterator(ReadOptions()); + std::unique_ptr iterator_guard(iterator); + ASSERT_OK(iterator->status()); + ASSERT_OK(iterator->Refresh()); + iterator->Seek(get_key(index)); + check_iterator(iterator, expected_status, expected_value); + } + // Next + { + Iterator* iterator = db_->NewIterator(ReadOptions()); + std::unique_ptr iterator_guard(iterator); + ASSERT_OK(iterator->Refresh()); + iterator->Seek(get_key(index - 1)); + ASSERT_TRUE(iterator->Valid()); + ASSERT_OK(iterator->status()); + iterator->Next(); + check_iterator(iterator, expected_status, expected_value); + } + // SeekForPrev + { + Iterator* iterator = db_->NewIterator(ReadOptions()); + std::unique_ptr iterator_guard(iterator); + ASSERT_OK(iterator->status()); + ASSERT_OK(iterator->Refresh()); + iterator->SeekForPrev(get_key(index)); + check_iterator(iterator, expected_status, expected_value); + } + // Prev + { + Iterator* iterator = db_->NewIterator(ReadOptions()); + std::unique_ptr iterator_guard(iterator); + iterator->Seek(get_key(index + 1)); + ASSERT_TRUE(iterator->Valid()); + ASSERT_OK(iterator->status()); + iterator->Prev(); + check_iterator(iterator, expected_status, expected_value); + } + }; + + Options options = GetTestOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + + DestroyAndReopen(options); + + // fill data + for (size_t i = 0; i < data.size(); i++) { + for (size_t j = 0; j < data[i].size(); j++) { + std::string key = get_key(i); + std::string value = get_value(i, j); + if (data[i][j] == "Put") { + ASSERT_OK(Put(key, value)); + ASSERT_OK(Flush()); + } else if (data[i][j] == "Merge") { + ASSERT_OK(Merge(key, value)); + ASSERT_OK(Flush()); + } + } + } + + std::string expected_value = get_value(1, 0) + "," + get_value(1, 1) + "," + + get_value(1, 2) + "," + get_value(1, 3); + Status expected_status; + verify(1, expected_status, expected_value); + +#ifndef ROCKSDB_LITE + // Test DBIter::FindValueForCurrentKeyUsingSeek flow. + ASSERT_OK(dbfull()->SetOptions(cfh(), + {{"max_sequential_skip_in_iterations", "0"}})); + verify(1, expected_status, expected_value); +#endif // !ROCKSDB_LITE +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + RegisterCustomObjects(argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,21 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/blob/prefetch_buffer_collection.h" + +namespace ROCKSDB_NAMESPACE { + +FilePrefetchBuffer* PrefetchBufferCollection::GetOrCreatePrefetchBuffer( + uint64_t file_number) { + auto& prefetch_buffer = prefetch_buffers_[file_number]; + if (!prefetch_buffer) { + prefetch_buffer.reset( + new FilePrefetchBuffer(readahead_size_, readahead_size_)); + } + + return prefetch_buffer.get(); +} + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,38 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include +#include + +#include "file/file_prefetch_buffer.h" +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +// A class that owns a collection of FilePrefetchBuffers using the file number +// as key. Used for implementing compaction readahead for blob files. Designed +// to be accessed by a single thread only: every (sub)compaction needs its own +// buffers since they are guaranteed to read different blobs from different +// positions even when reading the same file. +class PrefetchBufferCollection { + public: + explicit PrefetchBufferCollection(uint64_t readahead_size) + : readahead_size_(readahead_size) { + assert(readahead_size_ > 0); + } + + FilePrefetchBuffer* GetOrCreatePrefetchBuffer(uint64_t file_number); + + private: + uint64_t readahead_size_; + std::unordered_map> + prefetch_buffers_; // maps file number to prefetch buffer +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob_index.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob_index.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/blob_index.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/blob_index.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,179 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -#pragma once -#ifndef ROCKSDB_LITE - -#include -#include - -#include "rocksdb/options.h" -#include "util/coding.h" -#include "util/string_util.h" - -namespace ROCKSDB_NAMESPACE { - -// BlobIndex is a pointer to the blob and metadata of the blob. The index is -// stored in base DB as ValueType::kTypeBlobIndex. -// There are three types of blob index: -// -// kInlinedTTL: -// +------+------------+---------------+ -// | type | expiration | value | -// +------+------------+---------------+ -// | char | varint64 | variable size | -// +------+------------+---------------+ -// -// kBlob: -// +------+-------------+----------+----------+-------------+ -// | type | file number | offset | size | compression | -// +------+-------------+----------+----------+-------------+ -// | char | varint64 | varint64 | varint64 | char | -// +------+-------------+----------+----------+-------------+ -// -// kBlobTTL: -// +------+------------+-------------+----------+----------+-------------+ -// | type | expiration | file number | offset | size | compression | -// +------+------------+-------------+----------+----------+-------------+ -// | char | varint64 | varint64 | varint64 | varint64 | char | -// +------+------------+-------------+----------+----------+-------------+ -// -// There isn't a kInlined (without TTL) type since we can store it as a plain -// value (i.e. ValueType::kTypeValue). -class BlobIndex { - public: - enum class Type : unsigned char { - kInlinedTTL = 0, - kBlob = 1, - kBlobTTL = 2, - kUnknown = 3, - }; - - BlobIndex() : type_(Type::kUnknown) {} - - bool IsInlined() const { return type_ == Type::kInlinedTTL; } - - bool HasTTL() const { - return type_ == Type::kInlinedTTL || type_ == Type::kBlobTTL; - } - - uint64_t expiration() const { - assert(HasTTL()); - return expiration_; - } - - const Slice& value() const { - assert(IsInlined()); - return value_; - } - - uint64_t file_number() const { - assert(!IsInlined()); - return file_number_; - } - - uint64_t offset() const { - assert(!IsInlined()); - return offset_; - } - - uint64_t size() const { - assert(!IsInlined()); - return size_; - } - - Status DecodeFrom(Slice slice) { - static const std::string kErrorMessage = "Error while decoding blob index"; - assert(slice.size() > 0); - type_ = static_cast(*slice.data()); - if (type_ >= Type::kUnknown) { - return Status::Corruption( - kErrorMessage, - "Unknown blob index type: " + ToString(static_cast(type_))); - } - slice = Slice(slice.data() + 1, slice.size() - 1); - if (HasTTL()) { - if (!GetVarint64(&slice, &expiration_)) { - return Status::Corruption(kErrorMessage, "Corrupted expiration"); - } - } - if (IsInlined()) { - value_ = slice; - } else { - if (GetVarint64(&slice, &file_number_) && GetVarint64(&slice, &offset_) && - GetVarint64(&slice, &size_) && slice.size() == 1) { - compression_ = static_cast(*slice.data()); - } else { - return Status::Corruption(kErrorMessage, "Corrupted blob offset"); - } - } - return Status::OK(); - } - - std::string DebugString(bool output_hex) const { - std::ostringstream oss; - - if (IsInlined()) { - oss << "[inlined blob] value:" << value_.ToString(output_hex); - } else { - oss << "[blob ref] file:" << file_number_ << " offset:" << offset_ - << " size:" << size_; - } - - if (HasTTL()) { - oss << " exp:" << expiration_; - } - - return oss.str(); - } - - static void EncodeInlinedTTL(std::string* dst, uint64_t expiration, - const Slice& value) { - assert(dst != nullptr); - dst->clear(); - dst->reserve(1 + kMaxVarint64Length + value.size()); - dst->push_back(static_cast(Type::kInlinedTTL)); - PutVarint64(dst, expiration); - dst->append(value.data(), value.size()); - } - - static void EncodeBlob(std::string* dst, uint64_t file_number, - uint64_t offset, uint64_t size, - CompressionType compression) { - assert(dst != nullptr); - dst->clear(); - dst->reserve(kMaxVarint64Length * 3 + 2); - dst->push_back(static_cast(Type::kBlob)); - PutVarint64(dst, file_number); - PutVarint64(dst, offset); - PutVarint64(dst, size); - dst->push_back(static_cast(compression)); - } - - static void EncodeBlobTTL(std::string* dst, uint64_t expiration, - uint64_t file_number, uint64_t offset, - uint64_t size, CompressionType compression) { - assert(dst != nullptr); - dst->clear(); - dst->reserve(kMaxVarint64Length * 4 + 2); - dst->push_back(static_cast(Type::kBlobTTL)); - PutVarint64(dst, expiration); - PutVarint64(dst, file_number); - PutVarint64(dst, offset); - PutVarint64(dst, size); - dst->push_back(static_cast(compression)); - } - - private: - Type type_ = Type::kUnknown; - uint64_t expiration_ = 0; - Slice value_; - uint64_t file_number_ = 0; - uint64_t offset_ = 0; - uint64_t size_ = 0; - CompressionType compression_ = kNoCompression; -}; - -} // namespace ROCKSDB_NAMESPACE -#endif // ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/builder.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/builder.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/builder.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/builder.cc 2025-05-19 16:14:27.000000000 +0000 @@ -13,19 +13,22 @@ #include #include +#include "db/blob/blob_file_builder.h" #include "db/compaction/compaction_iterator.h" -#include "db/dbformat.h" #include "db/event_helpers.h" #include "db/internal_stats.h" #include "db/merge_helper.h" +#include "db/output_validator.h" #include "db/range_del_aggregator.h" #include "db/table_cache.h" #include "db/version_edit.h" +#include "file/file_util.h" #include "file/filename.h" #include "file/read_write_util.h" #include "file/writable_file_writer.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/thread_status_util.h" +#include "options/options_helper.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" @@ -41,125 +44,172 @@ class TableFactory; -TableBuilder* NewTableBuilder( - const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions, - const InternalKeyComparator& internal_comparator, - const std::vector>* - int_tbl_prop_collector_factories, - uint32_t column_family_id, const std::string& column_family_name, - WritableFileWriter* file, const CompressionType compression_type, - uint64_t sample_for_compression, const CompressionOptions& compression_opts, - int level, const bool skip_filters, const uint64_t creation_time, - const uint64_t oldest_key_time, const uint64_t target_file_size, - const uint64_t file_creation_time) { - assert((column_family_id == +TableBuilder* NewTableBuilder(const TableBuilderOptions& tboptions, + WritableFileWriter* file) { + assert((tboptions.column_family_id == TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) == - column_family_name.empty()); - return ioptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, internal_comparator, - int_tbl_prop_collector_factories, compression_type, - sample_for_compression, compression_opts, - skip_filters, column_family_name, level, - creation_time, oldest_key_time, target_file_size, - file_creation_time), - column_family_id, file); + tboptions.column_family_name.empty()); + return tboptions.ioptions.table_factory->NewTableBuilder(tboptions, file); } Status BuildTable( - const std::string& dbname, Env* env, FileSystem* fs, - const ImmutableCFOptions& ioptions, - const MutableCFOptions& mutable_cf_options, const FileOptions& file_options, - TableCache* table_cache, InternalIterator* iter, + const std::string& dbname, VersionSet* versions, + const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions, + const FileOptions& file_options, TableCache* table_cache, + InternalIterator* iter, std::vector> range_del_iters, - FileMetaData* meta, const InternalKeyComparator& internal_comparator, - const std::vector>* - int_tbl_prop_collector_factories, - uint32_t column_family_id, const std::string& column_family_name, + FileMetaData* meta, std::vector* blob_file_additions, std::vector snapshots, SequenceNumber earliest_write_conflict_snapshot, - SnapshotChecker* snapshot_checker, const CompressionType compression, - uint64_t sample_for_compression, const CompressionOptions& compression_opts, - bool paranoid_file_checks, InternalStats* internal_stats, - TableFileCreationReason reason, EventLogger* event_logger, int job_id, - const Env::IOPriority io_priority, TableProperties* table_properties, - int level, const uint64_t creation_time, const uint64_t oldest_key_time, - Env::WriteLifeTimeHint write_hint, const uint64_t file_creation_time) { - assert((column_family_id == + SnapshotChecker* snapshot_checker, bool paranoid_file_checks, + InternalStats* internal_stats, IOStatus* io_status, + const std::shared_ptr& io_tracer, + BlobFileCreationReason blob_creation_reason, EventLogger* event_logger, + int job_id, const Env::IOPriority io_priority, + TableProperties* table_properties, Env::WriteLifeTimeHint write_hint, + const std::string* full_history_ts_low, + BlobFileCompletionCallback* blob_callback, uint64_t* num_input_entries, + uint64_t* memtable_payload_bytes, uint64_t* memtable_garbage_bytes) { + assert((tboptions.column_family_id == TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) == - column_family_name.empty()); + tboptions.column_family_name.empty()); + auto& mutable_cf_options = tboptions.moptions; + auto& ioptions = tboptions.ioptions; // Reports the IOStats for flush for every following bytes. const size_t kReportFlushIOStatsEvery = 1048576; + OutputValidator output_validator( + tboptions.internal_comparator, + /*enable_order_check=*/ + mutable_cf_options.check_flush_compaction_key_order, + /*enable_hash=*/paranoid_file_checks); Status s; meta->fd.file_size = 0; iter->SeekToFirst(); std::unique_ptr range_del_agg( - new CompactionRangeDelAggregator(&internal_comparator, snapshots)); + new CompactionRangeDelAggregator(&tboptions.internal_comparator, + snapshots)); + uint64_t num_unfragmented_tombstones = 0; + uint64_t total_tombstone_payload_bytes = 0; for (auto& range_del_iter : range_del_iters) { + num_unfragmented_tombstones += + range_del_iter->num_unfragmented_tombstones(); + total_tombstone_payload_bytes += + range_del_iter->total_tombstone_payload_bytes(); range_del_agg->AddTombstones(std::move(range_del_iter)); } std::string fname = TableFileName(ioptions.cf_paths, meta->fd.GetNumber(), meta->fd.GetPathId()); + std::vector blob_file_paths; + std::string file_checksum = kUnknownFileChecksum; + std::string file_checksum_func_name = kUnknownFileChecksumFuncName; #ifndef ROCKSDB_LITE - EventHelpers::NotifyTableFileCreationStarted( - ioptions.listeners, dbname, column_family_name, fname, job_id, reason); + EventHelpers::NotifyTableFileCreationStarted(ioptions.listeners, dbname, + tboptions.column_family_name, + fname, job_id, tboptions.reason); #endif // !ROCKSDB_LITE - TableProperties tp; + Env* env = db_options.env; + assert(env); + FileSystem* fs = db_options.fs.get(); + assert(fs); + TableProperties tp; if (iter->Valid() || !range_del_agg->IsEmpty()) { + std::unique_ptr compaction_filter; + if (ioptions.compaction_filter_factory != nullptr && + ioptions.compaction_filter_factory->ShouldFilterTableFileCreation( + tboptions.reason)) { + CompactionFilter::Context context; + context.is_full_compaction = false; + context.is_manual_compaction = false; + context.column_family_id = tboptions.column_family_id; + context.reason = tboptions.reason; + compaction_filter = + ioptions.compaction_filter_factory->CreateCompactionFilter(context); + if (compaction_filter != nullptr && + !compaction_filter->IgnoreSnapshots()) { + s.PermitUncheckedError(); + return Status::NotSupported( + "CompactionFilter::IgnoreSnapshots() = false is not supported " + "anymore."); + } + } + TableBuilder* builder; std::unique_ptr file_writer; - // Currently we only enable dictionary compression during compaction to the - // bottommost level. - CompressionOptions compression_opts_for_flush(compression_opts); - compression_opts_for_flush.max_dict_bytes = 0; - compression_opts_for_flush.zstd_max_train_bytes = 0; { std::unique_ptr file; #ifndef NDEBUG bool use_direct_writes = file_options.use_direct_writes; TEST_SYNC_POINT_CALLBACK("BuildTable:create_file", &use_direct_writes); #endif // !NDEBUG - s = NewWritableFile(fs, fname, &file, file_options); + IOStatus io_s = NewWritableFile(fs, fname, &file, file_options); + assert(s.ok()); + s = io_s; + if (io_status->ok()) { + *io_status = io_s; + } if (!s.ok()) { EventHelpers::LogAndNotifyTableFileCreationFinished( - event_logger, ioptions.listeners, dbname, column_family_name, fname, - job_id, meta->fd, kInvalidBlobFileNumber, tp, reason, s); + event_logger, ioptions.listeners, dbname, + tboptions.column_family_name, fname, job_id, meta->fd, + kInvalidBlobFileNumber, tp, tboptions.reason, s, file_checksum, + file_checksum_func_name); return s; } + FileTypeSet tmp_set = ioptions.checksum_handoff_file_types; file->SetIOPriority(io_priority); file->SetWriteLifeTimeHint(write_hint); - file_writer.reset(new WritableFileWriter( - std::move(file), fname, file_options, env, ioptions.statistics, - ioptions.listeners, ioptions.sst_file_checksum_func)); + std::move(file), fname, file_options, ioptions.clock, io_tracer, + ioptions.stats, ioptions.listeners, + ioptions.file_checksum_gen_factory.get(), + tmp_set.Contains(FileType::kTableFile), false)); + + builder = NewTableBuilder(tboptions, file_writer.get()); + } - builder = NewTableBuilder( - ioptions, mutable_cf_options, internal_comparator, - int_tbl_prop_collector_factories, column_family_id, - column_family_name, file_writer.get(), compression, - sample_for_compression, compression_opts_for_flush, level, - false /* skip_filters */, creation_time, oldest_key_time, - 0 /*target_file_size*/, file_creation_time); - } - - MergeHelper merge(env, internal_comparator.user_comparator(), - ioptions.merge_operator, nullptr, ioptions.info_log, - true /* internal key corruption is not ok */, - snapshots.empty() ? 0 : snapshots.back(), - snapshot_checker); + MergeHelper merge( + env, tboptions.internal_comparator.user_comparator(), + ioptions.merge_operator.get(), compaction_filter.get(), ioptions.logger, + true /* internal key corruption is not ok */, + snapshots.empty() ? 0 : snapshots.back(), snapshot_checker); + + std::unique_ptr blob_file_builder( + (mutable_cf_options.enable_blob_files && blob_file_additions) + ? new BlobFileBuilder( + versions, fs, &ioptions, &mutable_cf_options, &file_options, + job_id, tboptions.column_family_id, + tboptions.column_family_name, io_priority, write_hint, + io_tracer, blob_callback, blob_creation_reason, + &blob_file_paths, blob_file_additions) + : nullptr); CompactionIterator c_iter( - iter, internal_comparator.user_comparator(), &merge, kMaxSequenceNumber, - &snapshots, earliest_write_conflict_snapshot, snapshot_checker, env, - ShouldReportDetailedTime(env, ioptions.statistics), - true /* internal key corruption is not ok */, range_del_agg.get()); + iter, tboptions.internal_comparator.user_comparator(), &merge, + kMaxSequenceNumber, &snapshots, earliest_write_conflict_snapshot, + snapshot_checker, env, ShouldReportDetailedTime(env, ioptions.stats), + true /* internal key corruption is not ok */, range_del_agg.get(), + blob_file_builder.get(), ioptions.allow_data_in_errors, + /*compaction=*/nullptr, compaction_filter.get(), + /*shutting_down=*/nullptr, + /*preserve_deletes_seqnum=*/0, /*manual_compaction_paused=*/nullptr, + /*manual_compaction_canceled=*/nullptr, db_options.info_log, + full_history_ts_low); + c_iter.SeekToFirst(); for (; c_iter.Valid(); c_iter.Next()) { const Slice& key = c_iter.key(); const Slice& value = c_iter.value(); const ParsedInternalKey& ikey = c_iter.ikey(); + // Generate a rolling 64-bit hash of the key and values + // Note : + // Here "key" integrates 'sequence_number'+'kType'+'user key'. + s = output_validator.Add(key, value); + if (!s.ok()) { + break; + } builder->Add(key, value); meta->UpdateBoundaries(key, value, ikey.sequence, ikey.type); @@ -170,26 +220,39 @@ ThreadStatus::FLUSH_BYTES_WRITTEN, IOSTATS(bytes_written)); } } + if (!s.ok()) { + c_iter.status().PermitUncheckedError(); + } else if (!c_iter.status().ok()) { + s = c_iter.status(); + } - auto range_del_it = range_del_agg->NewIterator(); - for (range_del_it->SeekToFirst(); range_del_it->Valid(); - range_del_it->Next()) { - auto tombstone = range_del_it->Tombstone(); - auto kv = tombstone.Serialize(); - builder->Add(kv.first.Encode(), kv.second); - meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(), - tombstone.seq_, internal_comparator); + if (s.ok()) { + auto range_del_it = range_del_agg->NewIterator(); + for (range_del_it->SeekToFirst(); range_del_it->Valid(); + range_del_it->Next()) { + auto tombstone = range_del_it->Tombstone(); + auto kv = tombstone.Serialize(); + builder->Add(kv.first.Encode(), kv.second); + meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(), + tombstone.seq_, + tboptions.internal_comparator); + } } - // Finish and check for builder errors - tp = builder->GetTableProperties(); - bool empty = builder->NumEntries() == 0 && tp.num_range_deletions == 0; - s = c_iter.status(); + TEST_SYNC_POINT("BuildTable:BeforeFinishBuildTable"); + const bool empty = builder->IsEmpty(); + if (num_input_entries != nullptr) { + *num_input_entries = + c_iter.num_input_entry_scanned() + num_unfragmented_tombstones; + } if (!s.ok() || empty) { builder->Abandon(); } else { s = builder->Finish(); } + if (io_status->ok()) { + *io_status = builder->io_status(); + } if (s.ok() && !empty) { uint64_t file_size = builder->FileSize(); @@ -197,24 +260,64 @@ meta->marked_for_compaction = builder->NeedCompact(); assert(meta->fd.GetFileSize() > 0); tp = builder->GetTableProperties(); // refresh now that builder is finished + if (memtable_payload_bytes != nullptr && + memtable_garbage_bytes != nullptr) { + const CompactionIterationStats& ci_stats = c_iter.iter_stats(); + uint64_t total_payload_bytes = ci_stats.total_input_raw_key_bytes + + ci_stats.total_input_raw_value_bytes + + total_tombstone_payload_bytes; + uint64_t total_payload_bytes_written = + (tp.raw_key_size + tp.raw_value_size); + // Prevent underflow, which may still happen at this point + // since we only support inserts, deletes, and deleteRanges. + if (total_payload_bytes_written <= total_payload_bytes) { + *memtable_payload_bytes = total_payload_bytes; + *memtable_garbage_bytes = + total_payload_bytes - total_payload_bytes_written; + } else { + *memtable_payload_bytes = 0; + *memtable_garbage_bytes = 0; + } + } if (table_properties) { *table_properties = tp; } - // Add the checksum information to file metadata. - meta->file_checksum = builder->GetFileChecksum(); - meta->file_checksum_func_name = builder->GetFileChecksumFuncName(); } delete builder; // Finish and check for file errors + TEST_SYNC_POINT("BuildTable:BeforeSyncTable"); if (s.ok() && !empty) { - StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS); - s = file_writer->Sync(ioptions.use_fsync); + StopWatch sw(ioptions.clock, ioptions.stats, TABLE_SYNC_MICROS); + *io_status = file_writer->Sync(ioptions.use_fsync); } - if (s.ok() && !empty) { - s = file_writer->Close(); + TEST_SYNC_POINT("BuildTable:BeforeCloseTableFile"); + if (s.ok() && io_status->ok() && !empty) { + *io_status = file_writer->Close(); + } + if (s.ok() && io_status->ok() && !empty) { + // Add the checksum information to file metadata. + meta->file_checksum = file_writer->GetFileChecksum(); + meta->file_checksum_func_name = file_writer->GetFileChecksumFuncName(); + file_checksum = meta->file_checksum; + file_checksum_func_name = meta->file_checksum_func_name; + } + + if (s.ok()) { + s = *io_status; + } + + if (blob_file_builder) { + if (s.ok()) { + s = blob_file_builder->Finish(); + } else { + blob_file_builder->Abandon(s); + } + blob_file_builder.reset(); } + // TODO Also check the IO status when create the Iterator. + if (s.ok() && !empty) { // Verify that the table is usable // We set for_compaction to false and don't OptimizeForCompactionTableRead @@ -222,20 +325,32 @@ // No matter whether use_direct_io_for_flush_and_compaction is true, // we will regrad this verification as user reads since the goal is // to cache it here for further user reads + ReadOptions read_options; std::unique_ptr it(table_cache->NewIterator( - ReadOptions(), file_options, internal_comparator, *meta, - nullptr /* range_del_agg */, - mutable_cf_options.prefix_extractor.get(), nullptr, + read_options, file_options, tboptions.internal_comparator, *meta, + nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor, + nullptr, (internal_stats == nullptr) ? nullptr : internal_stats->GetFileReadHist(0), TableReaderCaller::kFlush, /*arena=*/nullptr, - /*skip_filter=*/false, level, /*smallest_compaction_key=*/nullptr, - /*largest_compaction_key*/ nullptr)); + /*skip_filter=*/false, tboptions.level_at_creation, + MaxFileSizeForL0MetaPin(mutable_cf_options), + /*smallest_compaction_key=*/nullptr, + /*largest_compaction_key*/ nullptr, + /*allow_unprepared_value*/ false)); s = it->status(); if (s.ok() && paranoid_file_checks) { + OutputValidator file_validator(tboptions.internal_comparator, + /*enable_order_check=*/true, + /*enable_hash=*/true); for (it->SeekToFirst(); it->Valid(); it->Next()) { + // Generate a rolling 64-bit hash of the key and values + file_validator.Add(it->key(), it->value()).PermitUncheckedError(); } s = it->status(); + if (s.ok() && !output_validator.CompareValidator(file_validator)) { + s = Status::Corruption("Paranoid checksums do not match"); + } } } } @@ -246,16 +361,38 @@ } if (!s.ok() || meta->fd.GetFileSize() == 0) { - fs->DeleteFile(fname, IOOptions(), nullptr); + TEST_SYNC_POINT("BuildTable:BeforeDeleteFile"); + + constexpr IODebugContext* dbg = nullptr; + + Status ignored = fs->DeleteFile(fname, IOOptions(), dbg); + ignored.PermitUncheckedError(); + + assert(blob_file_additions || blob_file_paths.empty()); + + if (blob_file_additions) { + for (const std::string& blob_file_path : blob_file_paths) { + ignored = DeleteDBFile(&db_options, blob_file_path, dbname, + /*force_bg=*/false, /*force_fg=*/false); + ignored.PermitUncheckedError(); + TEST_SYNC_POINT("BuildTable::AfterDeleteFile"); + } + } } + Status status_for_listener = s; if (meta->fd.GetFileSize() == 0) { fname = "(nil)"; + if (s.ok()) { + status_for_listener = Status::Aborted("Empty SST file not kept"); + } } // Output to event logger and fire events. EventHelpers::LogAndNotifyTableFileCreationFinished( - event_logger, ioptions.listeners, dbname, column_family_name, fname, - job_id, meta->fd, meta->oldest_blob_file_number, tp, reason, s); + event_logger, ioptions.listeners, dbname, tboptions.column_family_name, + fname, job_id, meta->fd, meta->oldest_blob_file_number, tp, + tboptions.reason, status_for_listener, file_checksum, + file_checksum_func_name); return s; } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/builder.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/builder.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/builder.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/builder.h 2025-05-19 16:14:27.000000000 +0000 @@ -24,34 +24,20 @@ namespace ROCKSDB_NAMESPACE { -struct Options; struct FileMetaData; -class Env; -struct EnvOptions; -class Iterator; +class VersionSet; +class BlobFileAddition; class SnapshotChecker; class TableCache; -class VersionEdit; class TableBuilder; class WritableFileWriter; class InternalStats; +class BlobFileCompletionCallback; -// @param column_family_name Name of the column family that is also identified -// by column_family_id, or empty string if unknown. It must outlive the -// TableBuilder returned by this function. -TableBuilder* NewTableBuilder( - const ImmutableCFOptions& options, const MutableCFOptions& moptions, - const InternalKeyComparator& internal_comparator, - const std::vector>* - int_tbl_prop_collector_factories, - uint32_t column_family_id, const std::string& column_family_name, - WritableFileWriter* file, const CompressionType compression_type, - const uint64_t sample_for_compression, - const CompressionOptions& compression_opts, int level, - const bool skip_filters = false, const uint64_t creation_time = 0, - const uint64_t oldest_key_time = 0, const uint64_t target_file_size = 0, - const uint64_t file_creation_time = 0); +// Convenience function for NewTableBuilder on the embedded table_factory. +TableBuilder* NewTableBuilder(const TableBuilderOptions& tboptions, + WritableFileWriter* file); // Build a Table file from the contents of *iter. The generated file // will be named according to number specified in meta. On success, the rest of @@ -62,27 +48,27 @@ // @param column_family_name Name of the column family that is also identified // by column_family_id, or empty string if unknown. extern Status BuildTable( - const std::string& dbname, Env* env, FileSystem* fs, - const ImmutableCFOptions& options, - const MutableCFOptions& mutable_cf_options, const FileOptions& file_options, - TableCache* table_cache, InternalIterator* iter, + const std::string& dbname, VersionSet* versions, + const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions, + const FileOptions& file_options, TableCache* table_cache, + InternalIterator* iter, std::vector> range_del_iters, - FileMetaData* meta, const InternalKeyComparator& internal_comparator, - const std::vector>* - int_tbl_prop_collector_factories, - uint32_t column_family_id, const std::string& column_family_name, + FileMetaData* meta, std::vector* blob_file_additions, std::vector snapshots, SequenceNumber earliest_write_conflict_snapshot, - SnapshotChecker* snapshot_checker, const CompressionType compression, - const uint64_t sample_for_compression, - const CompressionOptions& compression_opts, bool paranoid_file_checks, - InternalStats* internal_stats, TableFileCreationReason reason, + SnapshotChecker* snapshot_checker, bool paranoid_file_checks, + InternalStats* internal_stats, IOStatus* io_status, + const std::shared_ptr& io_tracer, + BlobFileCreationReason blob_creation_reason, EventLogger* event_logger = nullptr, int job_id = 0, const Env::IOPriority io_priority = Env::IO_HIGH, - TableProperties* table_properties = nullptr, int level = -1, - const uint64_t creation_time = 0, const uint64_t oldest_key_time = 0, + TableProperties* table_properties = nullptr, Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET, - const uint64_t file_creation_time = 0); + const std::string* full_history_ts_low = nullptr, + BlobFileCompletionCallback* blob_callback = nullptr, + uint64_t* num_input_entries = nullptr, + uint64_t* memtable_payload_bytes = nullptr, + uint64_t* memtable_garbage_bytes = nullptr); } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/c.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/c.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/c.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/c.cc 2025-05-19 16:14:27.000000000 +0000 @@ -11,7 +11,11 @@ #include "rocksdb/c.h" -#include +#include +#include +#include +#include + #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" @@ -24,6 +28,7 @@ #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" +#include "rocksdb/perf_context.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" @@ -35,17 +40,13 @@ #include "rocksdb/utilities/db_ttl.h" #include "rocksdb/utilities/memory_util.h" #include "rocksdb/utilities/optimistic_transaction_db.h" +#include "rocksdb/utilities/table_properties_collectors.h" #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "rocksdb/write_batch.h" -#include "rocksdb/perf_context.h" #include "utilities/merge_operators.h" -#include -#include -#include - using ROCKSDB_NAMESPACE::BackupableDBOptions; using ROCKSDB_NAMESPACE::BackupEngine; using ROCKSDB_NAMESPACE::BackupID; @@ -60,7 +61,6 @@ using ROCKSDB_NAMESPACE::ColumnFamilyHandle; using ROCKSDB_NAMESPACE::ColumnFamilyOptions; using ROCKSDB_NAMESPACE::CompactionFilter; -using ROCKSDB_NAMESPACE::CompactionFilterContext; using ROCKSDB_NAMESPACE::CompactionFilterFactory; using ROCKSDB_NAMESPACE::CompactionOptionsFIFO; using ROCKSDB_NAMESPACE::CompactRangeOptions; @@ -80,12 +80,15 @@ using ROCKSDB_NAMESPACE::Iterator; using ROCKSDB_NAMESPACE::LiveFileMetaData; using ROCKSDB_NAMESPACE::Logger; +using ROCKSDB_NAMESPACE::LRUCacheOptions; +using ROCKSDB_NAMESPACE::MemoryAllocator; using ROCKSDB_NAMESPACE::MemoryUtil; using ROCKSDB_NAMESPACE::MergeOperator; -using ROCKSDB_NAMESPACE::MergeOperators; using ROCKSDB_NAMESPACE::NewBloomFilterPolicy; +using ROCKSDB_NAMESPACE::NewCompactOnDeletionCollectorFactory; using ROCKSDB_NAMESPACE::NewGenericRateLimiter; using ROCKSDB_NAMESPACE::NewLRUCache; +using ROCKSDB_NAMESPACE::NewRibbonFilterPolicy; using ROCKSDB_NAMESPACE::OptimisticTransactionDB; using ROCKSDB_NAMESPACE::OptimisticTransactionOptions; using ROCKSDB_NAMESPACE::Options; @@ -104,6 +107,7 @@ using ROCKSDB_NAMESPACE::Snapshot; using ROCKSDB_NAMESPACE::SstFileWriter; using ROCKSDB_NAMESPACE::Status; +using ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory; using ROCKSDB_NAMESPACE::Transaction; using ROCKSDB_NAMESPACE::TransactionDB; using ROCKSDB_NAMESPACE::TransactionDBOptions; @@ -115,10 +119,8 @@ using ROCKSDB_NAMESPACE::WriteBatchWithIndex; using ROCKSDB_NAMESPACE::WriteOptions; -using std::shared_ptr; using std::vector; using std::unordered_set; -using std::map; extern "C" { @@ -154,6 +156,12 @@ struct rocksdb_logger_t { std::shared_ptr rep; }; +struct rocksdb_lru_cache_options_t { + LRUCacheOptions rep; +}; +struct rocksdb_memory_allocator_t { + std::shared_ptr rep; +}; struct rocksdb_cache_t { std::shared_ptr rep; }; @@ -181,6 +189,9 @@ struct rocksdb_transaction_t { Transaction* rep; }; +struct rocksdb_backupable_db_options_t { + BackupableDBOptions rep; +}; struct rocksdb_checkpoint_t { Checkpoint* rep; }; @@ -504,13 +515,13 @@ return result; } -rocksdb_t* rocksdb_open_for_read_only( - const rocksdb_options_t* options, - const char* name, - unsigned char error_if_log_file_exist, - char** errptr) { +rocksdb_t* rocksdb_open_for_read_only(const rocksdb_options_t* options, + const char* name, + unsigned char error_if_wal_file_exists, + char** errptr) { DB* db; - if (SaveError(errptr, DB::OpenForReadOnly(options->rep, std::string(name), &db, error_if_log_file_exist))) { + if (SaveError(errptr, DB::OpenForReadOnly(options->rep, std::string(name), + &db, error_if_wal_file_exists))) { return nullptr; } rocksdb_t* result = new rocksdb_t; @@ -549,6 +560,18 @@ return result; } +rocksdb_backup_engine_t* rocksdb_backup_engine_open_opts( + const rocksdb_backupable_db_options_t* options, rocksdb_env_t* env, + char** errptr) { + BackupEngine* be; + if (SaveError(errptr, BackupEngine::Open(options->rep, env->rep, &be))) { + return nullptr; + } + rocksdb_backup_engine_t* result = new rocksdb_backup_engine_t; + result->rep = be; + return result; +} + void rocksdb_backup_engine_create_new_backup(rocksdb_backup_engine_t* be, rocksdb_t* db, char** errptr) { @@ -595,6 +618,15 @@ restore_options->rep)); } +void rocksdb_backup_engine_restore_db_from_backup( + rocksdb_backup_engine_t* be, const char* db_dir, const char* wal_dir, + const rocksdb_restore_options_t* restore_options, const uint32_t backup_id, + char** errptr) { + SaveError(errptr, be->rep->RestoreDBFromBackup(backup_id, std::string(db_dir), + std::string(wal_dir), + restore_options->rep)); +} + const rocksdb_backup_engine_info_t* rocksdb_backup_engine_get_backup_info( rocksdb_backup_engine_t* be) { rocksdb_backup_engine_info_t* result = new rocksdb_backup_engine_info_t; @@ -636,6 +668,128 @@ delete be; } +rocksdb_backupable_db_options_t* rocksdb_backupable_db_options_create( + const char* backup_dir) { + return new rocksdb_backupable_db_options_t{ + BackupableDBOptions(std::string(backup_dir))}; +} + +void rocksdb_backupable_db_options_set_backup_dir( + rocksdb_backupable_db_options_t* options, const char* backup_dir) { + options->rep.backup_dir = std::string(backup_dir); +} + +void rocksdb_backupable_db_options_set_env( + rocksdb_backupable_db_options_t* options, rocksdb_env_t* env) { + options->rep.backup_env = (env ? env->rep : nullptr); +} + +void rocksdb_backupable_db_options_set_share_table_files( + rocksdb_backupable_db_options_t* options, unsigned char val) { + options->rep.share_table_files = val; +} + +unsigned char rocksdb_backupable_db_options_get_share_table_files( + rocksdb_backupable_db_options_t* options) { + return options->rep.share_table_files; +} + +void rocksdb_backupable_db_options_set_sync( + rocksdb_backupable_db_options_t* options, unsigned char val) { + options->rep.sync = val; +} + +unsigned char rocksdb_backupable_db_options_get_sync( + rocksdb_backupable_db_options_t* options) { + return options->rep.sync; +} + +void rocksdb_backupable_db_options_set_destroy_old_data( + rocksdb_backupable_db_options_t* options, unsigned char val) { + options->rep.destroy_old_data = val; +} + +unsigned char rocksdb_backupable_db_options_get_destroy_old_data( + rocksdb_backupable_db_options_t* options) { + return options->rep.destroy_old_data; +} + +void rocksdb_backupable_db_options_set_backup_log_files( + rocksdb_backupable_db_options_t* options, unsigned char val) { + options->rep.backup_log_files = val; +} + +unsigned char rocksdb_backupable_db_options_get_backup_log_files( + rocksdb_backupable_db_options_t* options) { + return options->rep.backup_log_files; +} + +void rocksdb_backupable_db_options_set_backup_rate_limit( + rocksdb_backupable_db_options_t* options, uint64_t limit) { + options->rep.backup_rate_limit = limit; +} + +uint64_t rocksdb_backupable_db_options_get_backup_rate_limit( + rocksdb_backupable_db_options_t* options) { + return options->rep.backup_rate_limit; +} + +void rocksdb_backupable_db_options_set_restore_rate_limit( + rocksdb_backupable_db_options_t* options, uint64_t limit) { + options->rep.restore_rate_limit = limit; +} + +uint64_t rocksdb_backupable_db_options_get_restore_rate_limit( + rocksdb_backupable_db_options_t* options) { + return options->rep.restore_rate_limit; +} + +void rocksdb_backupable_db_options_set_max_background_operations( + rocksdb_backupable_db_options_t* options, int val) { + options->rep.max_background_operations = val; +} + +int rocksdb_backupable_db_options_get_max_background_operations( + rocksdb_backupable_db_options_t* options) { + return options->rep.max_background_operations; +} + +void rocksdb_backupable_db_options_set_callback_trigger_interval_size( + rocksdb_backupable_db_options_t* options, uint64_t size) { + options->rep.callback_trigger_interval_size = size; +} + +uint64_t rocksdb_backupable_db_options_get_callback_trigger_interval_size( + rocksdb_backupable_db_options_t* options) { + return options->rep.callback_trigger_interval_size; +} + +void rocksdb_backupable_db_options_set_max_valid_backups_to_open( + rocksdb_backupable_db_options_t* options, int val) { + options->rep.max_valid_backups_to_open = val; +} + +int rocksdb_backupable_db_options_get_max_valid_backups_to_open( + rocksdb_backupable_db_options_t* options) { + return options->rep.max_valid_backups_to_open; +} + +void rocksdb_backupable_db_options_set_share_files_with_checksum_naming( + rocksdb_backupable_db_options_t* options, int val) { + options->rep.share_files_with_checksum_naming = + static_cast(val); +} + +int rocksdb_backupable_db_options_get_share_files_with_checksum_naming( + rocksdb_backupable_db_options_t* options) { + return static_cast(options->rep.share_files_with_checksum_naming); +} + +void rocksdb_backupable_db_options_destroy( + rocksdb_backupable_db_options_t* options) { + delete options; +} + rocksdb_checkpoint_t* rocksdb_checkpoint_object_create(rocksdb_t* db, char** errptr) { Checkpoint* checkpoint; @@ -698,12 +852,47 @@ return result; } +rocksdb_t* rocksdb_open_column_families_with_ttl( + const rocksdb_options_t* db_options, const char* name, + int num_column_families, const char* const* column_family_names, + const rocksdb_options_t* const* column_family_options, + rocksdb_column_family_handle_t** column_family_handles, const int* ttls, + char** errptr) { + std::vector ttls_vec; + std::vector column_families; + for (int i = 0; i < num_column_families; i++) { + ttls_vec.push_back(ttls[i]); + + column_families.push_back(ColumnFamilyDescriptor( + std::string(column_family_names[i]), + ColumnFamilyOptions(column_family_options[i]->rep))); + } + + ROCKSDB_NAMESPACE::DBWithTTL* db; + std::vector handles; + if (SaveError(errptr, ROCKSDB_NAMESPACE::DBWithTTL::Open( + DBOptions(db_options->rep), std::string(name), + column_families, &handles, &db, ttls_vec))) { + return nullptr; + } + + for (size_t i = 0; i < handles.size(); i++) { + rocksdb_column_family_handle_t* c_handle = + new rocksdb_column_family_handle_t; + c_handle->rep = handles[i]; + column_family_handles[i] = c_handle; + } + rocksdb_t* result = new rocksdb_t; + result->rep = db; + return result; +} + rocksdb_t* rocksdb_open_for_read_only_column_families( const rocksdb_options_t* db_options, const char* name, int num_column_families, const char* const* column_family_names, const rocksdb_options_t* const* column_family_options, rocksdb_column_family_handle_t** column_family_handles, - unsigned char error_if_log_file_exist, char** errptr) { + unsigned char error_if_wal_file_exists, char** errptr) { std::vector column_families; for (int i = 0; i < num_column_families; i++) { column_families.push_back(ColumnFamilyDescriptor( @@ -713,8 +902,10 @@ DB* db; std::vector handles; - if (SaveError(errptr, DB::OpenForReadOnly(DBOptions(db_options->rep), - std::string(name), column_families, &handles, &db, error_if_log_file_exist))) { + if (SaveError(errptr, + DB::OpenForReadOnly(DBOptions(db_options->rep), + std::string(name), column_families, + &handles, &db, error_if_wal_file_exists))) { return nullptr; } @@ -796,6 +987,18 @@ return handle; } +rocksdb_column_family_handle_t* rocksdb_create_column_family_with_ttl( + rocksdb_t* db, const rocksdb_options_t* column_family_options, + const char* column_family_name, int ttl, char** errptr) { + ROCKSDB_NAMESPACE::DBWithTTL* db_with_ttl = + static_cast(db->rep); + rocksdb_column_family_handle_t* handle = new rocksdb_column_family_handle_t; + SaveError(errptr, db_with_ttl->CreateColumnFamilyWithTtl( + ColumnFamilyOptions(column_family_options->rep), + std::string(column_family_name), &(handle->rep), ttl)); + return handle; +} + void rocksdb_drop_column_family( rocksdb_t* db, rocksdb_column_family_handle_t* handle, @@ -996,6 +1199,55 @@ } } +unsigned char rocksdb_key_may_exist(rocksdb_t* db, + const rocksdb_readoptions_t* options, + const char* key, size_t key_len, + char** value, size_t* val_len, + const char* timestamp, size_t timestamp_len, + unsigned char* value_found) { + std::string tmp; + std::string time; + if (timestamp) { + time.assign(timestamp, timestamp_len); + } + bool found = false; + const bool result = db->rep->KeyMayExist(options->rep, Slice(key, key_len), + &tmp, timestamp ? &time : nullptr, + value_found ? &found : nullptr); + if (value_found) { + *value_found = found; + if (found) { + *val_len = tmp.size(); + *value = CopyString(tmp); + } + } + return result; +} + +unsigned char rocksdb_key_may_exist_cf( + rocksdb_t* db, const rocksdb_readoptions_t* options, + rocksdb_column_family_handle_t* column_family, const char* key, + size_t key_len, char** value, size_t* val_len, const char* timestamp, + size_t timestamp_len, unsigned char* value_found) { + std::string tmp; + std::string time; + if (timestamp) { + time.assign(timestamp, timestamp_len); + } + bool found = false; + const bool result = db->rep->KeyMayExist( + options->rep, column_family->rep, Slice(key, key_len), &tmp, + timestamp ? &time : nullptr, value_found ? &found : nullptr); + if (value_found) { + *value_found = found; + if (found) { + *val_len = tmp.size(); + *value = CopyString(tmp); + } + } + return result; +} + rocksdb_iterator_t* rocksdb_create_iterator( rocksdb_t* db, const rocksdb_readoptions_t* options) { @@ -1148,34 +1400,39 @@ } } -void rocksdb_approximate_sizes( - rocksdb_t* db, - int num_ranges, - const char* const* range_start_key, const size_t* range_start_key_len, - const char* const* range_limit_key, const size_t* range_limit_key_len, - uint64_t* sizes) { +void rocksdb_approximate_sizes(rocksdb_t* db, int num_ranges, + const char* const* range_start_key, + const size_t* range_start_key_len, + const char* const* range_limit_key, + const size_t* range_limit_key_len, + uint64_t* sizes, char** errptr) { Range* ranges = new Range[num_ranges]; for (int i = 0; i < num_ranges; i++) { ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]); ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]); } - db->rep->GetApproximateSizes(ranges, num_ranges, sizes); + Status s = db->rep->GetApproximateSizes(ranges, num_ranges, sizes); + if (!s.ok()) { + SaveError(errptr, s); + } delete[] ranges; } void rocksdb_approximate_sizes_cf( - rocksdb_t* db, - rocksdb_column_family_handle_t* column_family, - int num_ranges, - const char* const* range_start_key, const size_t* range_start_key_len, - const char* const* range_limit_key, const size_t* range_limit_key_len, - uint64_t* sizes) { + rocksdb_t* db, rocksdb_column_family_handle_t* column_family, + int num_ranges, const char* const* range_start_key, + const size_t* range_start_key_len, const char* const* range_limit_key, + const size_t* range_limit_key_len, uint64_t* sizes, char** errptr) { Range* ranges = new Range[num_ranges]; for (int i = 0; i < num_ranges; i++) { ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]); ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]); } - db->rep->GetApproximateSizes(column_family->rep, ranges, num_ranges, sizes); + Status s = db->rep->GetApproximateSizes(column_family->rep, ranges, + num_ranges, sizes); + if (!s.ok()) { + SaveError(errptr, s); + } delete[] ranges; } @@ -1256,6 +1513,10 @@ SaveError(errptr, db->rep->Flush(options->rep, column_family->rep)); } +void rocksdb_flush_wal(rocksdb_t* db, unsigned char sync, char** errptr) { + SaveError(errptr, db->rep->FlushWAL(sync)); +} + void rocksdb_disable_file_deletions( rocksdb_t* db, char** errptr) { @@ -1466,6 +1727,11 @@ b->rep.Delete(Slice(key, klen)); } +void rocksdb_writebatch_singledelete(rocksdb_writebatch_t* b, const char* key, + size_t klen) { + b->rep.SingleDelete(Slice(key, klen)); +} + void rocksdb_writebatch_delete_cf( rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, @@ -1473,6 +1739,12 @@ b->rep.Delete(column_family->rep, Slice(key, klen)); } +void rocksdb_writebatch_singledelete_cf( + rocksdb_writebatch_t* b, rocksdb_column_family_handle_t* column_family, + const char* key, size_t klen) { + b->rep.SingleDelete(column_family->rep, Slice(key, klen)); +} + void rocksdb_writebatch_deletev( rocksdb_writebatch_t* b, int num_keys, const char* const* keys_list, @@ -1723,6 +1995,11 @@ b->rep->Delete(Slice(key, klen)); } +void rocksdb_writebatch_wi_singledelete(rocksdb_writebatch_wi_t* b, + const char* key, size_t klen) { + b->rep->SingleDelete(Slice(key, klen)); +} + void rocksdb_writebatch_wi_delete_cf( rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, @@ -1730,6 +2007,12 @@ b->rep->Delete(column_family->rep, Slice(key, klen)); } +void rocksdb_writebatch_wi_singledelete_cf( + rocksdb_writebatch_wi_t* b, rocksdb_column_family_handle_t* column_family, + const char* key, size_t klen) { + b->rep->SingleDelete(column_family->rep, Slice(key, klen)); +} + void rocksdb_writebatch_wi_deletev( rocksdb_writebatch_wi_t* b, int num_keys, const char* const* keys_list, @@ -2154,6 +2437,10 @@ delete options; } +rocksdb_options_t* rocksdb_options_create_copy(rocksdb_options_t* options) { + return new rocksdb_options_t(*options); +} + void rocksdb_options_increase_parallelism( rocksdb_options_t* opt, int total_threads) { opt->rep.IncreaseParallelism(total_threads); @@ -2179,6 +2466,10 @@ opt->rep.allow_ingest_behind = v; } +unsigned char rocksdb_options_get_allow_ingest_behind(rocksdb_options_t* opt) { + return opt->rep.allow_ingest_behind; +} + void rocksdb_options_set_compaction_filter( rocksdb_options_t* opt, rocksdb_compactionfilter_t* filter) { @@ -2196,6 +2487,10 @@ opt->rep.compaction_readahead_size = s; } +size_t rocksdb_options_get_compaction_readahead_size(rocksdb_options_t* opt) { + return opt->rep.compaction_readahead_size; +} + void rocksdb_options_set_comparator( rocksdb_options_t* opt, rocksdb_comparator_t* cmp) { @@ -2208,27 +2503,43 @@ opt->rep.merge_operator = std::shared_ptr(merge_operator); } - void rocksdb_options_set_create_if_missing( rocksdb_options_t* opt, unsigned char v) { opt->rep.create_if_missing = v; } +unsigned char rocksdb_options_get_create_if_missing(rocksdb_options_t* opt) { + return opt->rep.create_if_missing; +} + void rocksdb_options_set_create_missing_column_families( rocksdb_options_t* opt, unsigned char v) { opt->rep.create_missing_column_families = v; } +unsigned char rocksdb_options_get_create_missing_column_families( + rocksdb_options_t* opt) { + return opt->rep.create_missing_column_families; +} + void rocksdb_options_set_error_if_exists( rocksdb_options_t* opt, unsigned char v) { opt->rep.error_if_exists = v; } +unsigned char rocksdb_options_get_error_if_exists(rocksdb_options_t* opt) { + return opt->rep.error_if_exists; +} + void rocksdb_options_set_paranoid_checks( rocksdb_options_t* opt, unsigned char v) { opt->rep.paranoid_checks = v; } +unsigned char rocksdb_options_get_paranoid_checks(rocksdb_options_t* opt) { + return opt->rep.paranoid_checks; +} + void rocksdb_options_set_db_paths(rocksdb_options_t* opt, const rocksdb_dbpath_t** dbpath_values, size_t num_paths) { @@ -2254,57 +2565,107 @@ opt->rep.info_log_level = static_cast(v); } +int rocksdb_options_get_info_log_level(rocksdb_options_t* opt) { + return static_cast(opt->rep.info_log_level); +} + void rocksdb_options_set_db_write_buffer_size(rocksdb_options_t* opt, size_t s) { opt->rep.db_write_buffer_size = s; } +size_t rocksdb_options_get_db_write_buffer_size(rocksdb_options_t* opt) { + return opt->rep.db_write_buffer_size; +} + void rocksdb_options_set_write_buffer_size(rocksdb_options_t* opt, size_t s) { opt->rep.write_buffer_size = s; } +size_t rocksdb_options_get_write_buffer_size(rocksdb_options_t* opt) { + return opt->rep.write_buffer_size; +} + void rocksdb_options_set_max_open_files(rocksdb_options_t* opt, int n) { opt->rep.max_open_files = n; } +int rocksdb_options_get_max_open_files(rocksdb_options_t* opt) { + return opt->rep.max_open_files; +} + void rocksdb_options_set_max_file_opening_threads(rocksdb_options_t* opt, int n) { opt->rep.max_file_opening_threads = n; } +int rocksdb_options_get_max_file_opening_threads(rocksdb_options_t* opt) { + return opt->rep.max_file_opening_threads; +} + void rocksdb_options_set_max_total_wal_size(rocksdb_options_t* opt, uint64_t n) { opt->rep.max_total_wal_size = n; } +uint64_t rocksdb_options_get_max_total_wal_size(rocksdb_options_t* opt) { + return opt->rep.max_total_wal_size; +} + void rocksdb_options_set_target_file_size_base( rocksdb_options_t* opt, uint64_t n) { opt->rep.target_file_size_base = n; } +uint64_t rocksdb_options_get_target_file_size_base(rocksdb_options_t* opt) { + return opt->rep.target_file_size_base; +} + void rocksdb_options_set_target_file_size_multiplier( rocksdb_options_t* opt, int n) { opt->rep.target_file_size_multiplier = n; } +int rocksdb_options_get_target_file_size_multiplier(rocksdb_options_t* opt) { + return opt->rep.target_file_size_multiplier; +} + void rocksdb_options_set_max_bytes_for_level_base( rocksdb_options_t* opt, uint64_t n) { opt->rep.max_bytes_for_level_base = n; } +uint64_t rocksdb_options_get_max_bytes_for_level_base(rocksdb_options_t* opt) { + return opt->rep.max_bytes_for_level_base; +} + void rocksdb_options_set_level_compaction_dynamic_level_bytes( rocksdb_options_t* opt, unsigned char v) { opt->rep.level_compaction_dynamic_level_bytes = v; } +unsigned char rocksdb_options_get_level_compaction_dynamic_level_bytes( + rocksdb_options_t* opt) { + return opt->rep.level_compaction_dynamic_level_bytes; +} + void rocksdb_options_set_max_bytes_for_level_multiplier(rocksdb_options_t* opt, double n) { opt->rep.max_bytes_for_level_multiplier = n; } +double rocksdb_options_get_max_bytes_for_level_multiplier( + rocksdb_options_t* opt) { + return opt->rep.max_bytes_for_level_multiplier; +} + void rocksdb_options_set_max_compaction_bytes(rocksdb_options_t* opt, uint64_t n) { opt->rep.max_compaction_bytes = n; } +uint64_t rocksdb_options_get_max_compaction_bytes(rocksdb_options_t* opt) { + return opt->rep.max_compaction_bytes; +} + void rocksdb_options_set_max_bytes_for_level_multiplier_additional( rocksdb_options_t* opt, int* level_values, size_t num_levels) { opt->rep.max_bytes_for_level_multiplier_additional.resize(num_levels); @@ -2322,30 +2683,129 @@ opt->rep.skip_stats_update_on_db_open = val; } +unsigned char rocksdb_options_get_skip_stats_update_on_db_open( + rocksdb_options_t* opt) { + return opt->rep.skip_stats_update_on_db_open; +} + void rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open( rocksdb_options_t* opt, unsigned char val) { opt->rep.skip_checking_sst_file_sizes_on_db_open = val; } +unsigned char rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open( + rocksdb_options_t* opt) { + return opt->rep.skip_checking_sst_file_sizes_on_db_open; +} + +/* Blob Options Settings */ +void rocksdb_options_set_enable_blob_files(rocksdb_options_t* opt, + unsigned char val) { + opt->rep.enable_blob_files = val; +} +extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_enable_blob_files( + rocksdb_options_t* opt) { + return opt->rep.enable_blob_files; +} + +void rocksdb_options_set_min_blob_size(rocksdb_options_t* opt, uint64_t val) { + opt->rep.min_blob_size = val; +} + +uint64_t rocksdb_options_get_min_blob_size(rocksdb_options_t* opt) { + return opt->rep.min_blob_size; +} + +void rocksdb_options_set_blob_file_size(rocksdb_options_t* opt, uint64_t val) { + opt->rep.blob_file_size = val; +} + +uint64_t rocksdb_options_get_blob_file_size(rocksdb_options_t* opt) { + return opt->rep.blob_file_size; +} + +void rocksdb_options_set_blob_compression_type(rocksdb_options_t* opt, + int val) { + opt->rep.blob_compression_type = static_cast(val); +} + +int rocksdb_options_get_blob_compression_type(rocksdb_options_t* opt) { + return opt->rep.blob_compression_type; +} + +void rocksdb_options_set_enable_blob_gc(rocksdb_options_t* opt, + unsigned char val) { + opt->rep.enable_blob_garbage_collection = val; +} + +unsigned char rocksdb_options_get_enable_blob_gc(rocksdb_options_t* opt) { + return opt->rep.enable_blob_garbage_collection; +} + +void rocksdb_options_set_blob_gc_age_cutoff(rocksdb_options_t* opt, + double val) { + opt->rep.blob_garbage_collection_age_cutoff = val; +} + +double rocksdb_options_get_blob_gc_age_cutoff(rocksdb_options_t* opt) { + return opt->rep.blob_garbage_collection_age_cutoff; +} + +void rocksdb_options_set_blob_gc_force_threshold(rocksdb_options_t* opt, + double val) { + opt->rep.blob_garbage_collection_force_threshold = val; +} + +double rocksdb_options_get_blob_gc_force_threshold(rocksdb_options_t* opt) { + return opt->rep.blob_garbage_collection_force_threshold; +} + +void rocksdb_options_set_blob_compaction_readahead_size(rocksdb_options_t* opt, + uint64_t val) { + opt->rep.blob_compaction_readahead_size = val; +} + +uint64_t rocksdb_options_get_blob_compaction_readahead_size( + rocksdb_options_t* opt) { + return opt->rep.blob_compaction_readahead_size; +} + void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) { opt->rep.num_levels = n; } +int rocksdb_options_get_num_levels(rocksdb_options_t* opt) { + return opt->rep.num_levels; +} + void rocksdb_options_set_level0_file_num_compaction_trigger( rocksdb_options_t* opt, int n) { opt->rep.level0_file_num_compaction_trigger = n; } +int rocksdb_options_get_level0_file_num_compaction_trigger( + rocksdb_options_t* opt) { + return opt->rep.level0_file_num_compaction_trigger; +} + void rocksdb_options_set_level0_slowdown_writes_trigger( rocksdb_options_t* opt, int n) { opt->rep.level0_slowdown_writes_trigger = n; } +int rocksdb_options_get_level0_slowdown_writes_trigger(rocksdb_options_t* opt) { + return opt->rep.level0_slowdown_writes_trigger; +} + void rocksdb_options_set_level0_stop_writes_trigger( rocksdb_options_t* opt, int n) { opt->rep.level0_stop_writes_trigger = n; } +int rocksdb_options_get_level0_stop_writes_trigger(rocksdb_options_t* opt) { + return opt->rep.level0_stop_writes_trigger; +} + void rocksdb_options_set_max_mem_compaction_level(rocksdb_options_t* /*opt*/, int /*n*/) {} @@ -2353,12 +2813,28 @@ opt->rep.wal_recovery_mode = static_cast(mode); } +int rocksdb_options_get_wal_recovery_mode(rocksdb_options_t* opt) { + return static_cast(opt->rep.wal_recovery_mode); +} + void rocksdb_options_set_compression(rocksdb_options_t* opt, int t) { opt->rep.compression = static_cast(t); } +int rocksdb_options_get_compression(rocksdb_options_t* opt) { + return opt->rep.compression; +} + +void rocksdb_options_set_bottommost_compression(rocksdb_options_t* opt, int t) { + opt->rep.bottommost_compression = static_cast(t); +} + +int rocksdb_options_get_bottommost_compression(rocksdb_options_t* opt) { + return opt->rep.bottommost_compression; +} + void rocksdb_options_set_compression_per_level(rocksdb_options_t* opt, - int* level_values, + const int* level_values, size_t num_levels) { opt->rep.compression_per_level.resize(num_levels); for (size_t i = 0; i < num_levels; ++i) { @@ -2371,7 +2847,7 @@ int w_bits, int level, int strategy, int max_dict_bytes, - bool enabled) { + unsigned char enabled) { opt->rep.bottommost_compression_opts.window_bits = w_bits; opt->rep.bottommost_compression_opts.level = level; opt->rep.bottommost_compression_opts.strategy = strategy; @@ -2379,6 +2855,21 @@ opt->rep.bottommost_compression_opts.enabled = enabled; } +void rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes( + rocksdb_options_t* opt, int zstd_max_train_bytes, unsigned char enabled) { + opt->rep.bottommost_compression_opts.zstd_max_train_bytes = + zstd_max_train_bytes; + opt->rep.bottommost_compression_opts.enabled = enabled; +} + +void rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes( + rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes, + unsigned char enabled) { + opt->rep.bottommost_compression_opts.max_dict_buffer_bytes = + max_dict_buffer_bytes; + opt->rep.bottommost_compression_opts.enabled = enabled; +} + void rocksdb_options_set_compression_options(rocksdb_options_t* opt, int w_bits, int level, int strategy, int max_dict_bytes) { @@ -2388,6 +2879,36 @@ opt->rep.compression_opts.max_dict_bytes = max_dict_bytes; } +void rocksdb_options_set_compression_options_zstd_max_train_bytes( + rocksdb_options_t* opt, int zstd_max_train_bytes) { + opt->rep.compression_opts.zstd_max_train_bytes = zstd_max_train_bytes; +} + +int rocksdb_options_get_compression_options_zstd_max_train_bytes( + rocksdb_options_t* opt) { + return opt->rep.compression_opts.zstd_max_train_bytes; +} + +void rocksdb_options_set_compression_options_parallel_threads( + rocksdb_options_t* opt, int value) { + opt->rep.compression_opts.parallel_threads = value; +} + +int rocksdb_options_get_compression_options_parallel_threads( + rocksdb_options_t* opt) { + return opt->rep.compression_opts.parallel_threads; +} + +void rocksdb_options_set_compression_options_max_dict_buffer_bytes( + rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes) { + opt->rep.compression_opts.max_dict_buffer_bytes = max_dict_buffer_bytes; +} + +uint64_t rocksdb_options_get_compression_options_max_dict_buffer_bytes( + rocksdb_options_t* opt) { + return opt->rep.compression_opts.max_dict_buffer_bytes; +} + void rocksdb_options_set_prefix_extractor( rocksdb_options_t* opt, rocksdb_slicetransform_t* prefix_extractor) { opt->rep.prefix_extractor.reset(prefix_extractor); @@ -2398,6 +2919,10 @@ opt->rep.use_fsync = use_fsync; } +int rocksdb_options_get_use_fsync(rocksdb_options_t* opt) { + return opt->rep.use_fsync; +} + void rocksdb_options_set_db_log_dir( rocksdb_options_t* opt, const char* db_log_dir) { opt->rep.db_log_dir = db_log_dir; @@ -2412,16 +2937,28 @@ opt->rep.WAL_ttl_seconds = ttl; } +uint64_t rocksdb_options_get_WAL_ttl_seconds(rocksdb_options_t* opt) { + return opt->rep.WAL_ttl_seconds; +} + void rocksdb_options_set_WAL_size_limit_MB( rocksdb_options_t* opt, uint64_t limit) { opt->rep.WAL_size_limit_MB = limit; } +uint64_t rocksdb_options_get_WAL_size_limit_MB(rocksdb_options_t* opt) { + return opt->rep.WAL_size_limit_MB; +} + void rocksdb_options_set_manifest_preallocation_size( rocksdb_options_t* opt, size_t v) { opt->rep.manifest_preallocation_size = v; } +size_t rocksdb_options_get_manifest_preallocation_size(rocksdb_options_t* opt) { + return opt->rep.manifest_preallocation_size; +} + // noop void rocksdb_options_set_purge_redundant_kvs_while_flush( rocksdb_options_t* /*opt*/, unsigned char /*v*/) {} @@ -2431,41 +2968,91 @@ opt->rep.use_direct_reads = v; } +unsigned char rocksdb_options_get_use_direct_reads(rocksdb_options_t* opt) { + return opt->rep.use_direct_reads; +} + void rocksdb_options_set_use_direct_io_for_flush_and_compaction( rocksdb_options_t* opt, unsigned char v) { opt->rep.use_direct_io_for_flush_and_compaction = v; } +unsigned char rocksdb_options_get_use_direct_io_for_flush_and_compaction( + rocksdb_options_t* opt) { + return opt->rep.use_direct_io_for_flush_and_compaction; +} + void rocksdb_options_set_allow_mmap_reads( rocksdb_options_t* opt, unsigned char v) { opt->rep.allow_mmap_reads = v; } +unsigned char rocksdb_options_get_allow_mmap_reads(rocksdb_options_t* opt) { + return opt->rep.allow_mmap_reads; +} + void rocksdb_options_set_allow_mmap_writes( rocksdb_options_t* opt, unsigned char v) { opt->rep.allow_mmap_writes = v; } +unsigned char rocksdb_options_get_allow_mmap_writes(rocksdb_options_t* opt) { + return opt->rep.allow_mmap_writes; +} + void rocksdb_options_set_is_fd_close_on_exec( rocksdb_options_t* opt, unsigned char v) { opt->rep.is_fd_close_on_exec = v; } +unsigned char rocksdb_options_get_is_fd_close_on_exec(rocksdb_options_t* opt) { + return opt->rep.is_fd_close_on_exec; +} + void rocksdb_options_set_skip_log_error_on_recovery( rocksdb_options_t* opt, unsigned char v) { opt->rep.skip_log_error_on_recovery = v; } +unsigned char rocksdb_options_get_skip_log_error_on_recovery( + rocksdb_options_t* opt) { + return opt->rep.skip_log_error_on_recovery; +} + void rocksdb_options_set_stats_dump_period_sec( rocksdb_options_t* opt, unsigned int v) { opt->rep.stats_dump_period_sec = v; } +unsigned int rocksdb_options_get_stats_dump_period_sec(rocksdb_options_t* opt) { + return opt->rep.stats_dump_period_sec; +} + +void rocksdb_options_set_stats_persist_period_sec(rocksdb_options_t* opt, + unsigned int v) { + opt->rep.stats_persist_period_sec = v; +} + +unsigned int rocksdb_options_get_stats_persist_period_sec( + rocksdb_options_t* opt) { + return opt->rep.stats_persist_period_sec; +} + void rocksdb_options_set_advise_random_on_open( rocksdb_options_t* opt, unsigned char v) { opt->rep.advise_random_on_open = v; } +unsigned char rocksdb_options_get_advise_random_on_open( + rocksdb_options_t* opt) { + return opt->rep.advise_random_on_open; +} + +void rocksdb_options_set_experimental_mempurge_threshold(rocksdb_options_t* opt, + double v) { + opt->rep.experimental_mempurge_threshold = v; +} + void rocksdb_options_set_access_hint_on_compaction_start( rocksdb_options_t* opt, int v) { switch(v) { @@ -2485,142 +3072,276 @@ opt->rep.access_hint_on_compaction_start = ROCKSDB_NAMESPACE::Options::WILLNEED; break; + default: + assert(0); } } +int rocksdb_options_get_access_hint_on_compaction_start( + rocksdb_options_t* opt) { + return opt->rep.access_hint_on_compaction_start; +} + void rocksdb_options_set_use_adaptive_mutex( rocksdb_options_t* opt, unsigned char v) { opt->rep.use_adaptive_mutex = v; } +unsigned char rocksdb_options_get_use_adaptive_mutex(rocksdb_options_t* opt) { + return opt->rep.use_adaptive_mutex; +} + void rocksdb_options_set_wal_bytes_per_sync( rocksdb_options_t* opt, uint64_t v) { opt->rep.wal_bytes_per_sync = v; } +uint64_t rocksdb_options_get_wal_bytes_per_sync(rocksdb_options_t* opt) { + return opt->rep.wal_bytes_per_sync; +} + void rocksdb_options_set_bytes_per_sync( rocksdb_options_t* opt, uint64_t v) { opt->rep.bytes_per_sync = v; } +uint64_t rocksdb_options_get_bytes_per_sync(rocksdb_options_t* opt) { + return opt->rep.bytes_per_sync; +} + void rocksdb_options_set_writable_file_max_buffer_size(rocksdb_options_t* opt, uint64_t v) { opt->rep.writable_file_max_buffer_size = static_cast(v); } +uint64_t rocksdb_options_get_writable_file_max_buffer_size( + rocksdb_options_t* opt) { + return opt->rep.writable_file_max_buffer_size; +} + void rocksdb_options_set_allow_concurrent_memtable_write(rocksdb_options_t* opt, unsigned char v) { opt->rep.allow_concurrent_memtable_write = v; } +unsigned char rocksdb_options_get_allow_concurrent_memtable_write( + rocksdb_options_t* opt) { + return opt->rep.allow_concurrent_memtable_write; +} + void rocksdb_options_set_enable_write_thread_adaptive_yield( rocksdb_options_t* opt, unsigned char v) { opt->rep.enable_write_thread_adaptive_yield = v; } +unsigned char rocksdb_options_get_enable_write_thread_adaptive_yield( + rocksdb_options_t* opt) { + return opt->rep.enable_write_thread_adaptive_yield; +} + void rocksdb_options_set_max_sequential_skip_in_iterations( rocksdb_options_t* opt, uint64_t v) { opt->rep.max_sequential_skip_in_iterations = v; } +uint64_t rocksdb_options_get_max_sequential_skip_in_iterations( + rocksdb_options_t* opt) { + return opt->rep.max_sequential_skip_in_iterations; +} + void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t* opt, int n) { opt->rep.max_write_buffer_number = n; } +int rocksdb_options_get_max_write_buffer_number(rocksdb_options_t* opt) { + return opt->rep.max_write_buffer_number; +} + void rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t* opt, int n) { opt->rep.min_write_buffer_number_to_merge = n; } +int rocksdb_options_get_min_write_buffer_number_to_merge( + rocksdb_options_t* opt) { + return opt->rep.min_write_buffer_number_to_merge; +} + void rocksdb_options_set_max_write_buffer_number_to_maintain( rocksdb_options_t* opt, int n) { opt->rep.max_write_buffer_number_to_maintain = n; } +int rocksdb_options_get_max_write_buffer_number_to_maintain( + rocksdb_options_t* opt) { + return opt->rep.max_write_buffer_number_to_maintain; +} + void rocksdb_options_set_max_write_buffer_size_to_maintain( rocksdb_options_t* opt, int64_t n) { opt->rep.max_write_buffer_size_to_maintain = n; } +int64_t rocksdb_options_get_max_write_buffer_size_to_maintain( + rocksdb_options_t* opt) { + return opt->rep.max_write_buffer_size_to_maintain; +} + void rocksdb_options_set_enable_pipelined_write(rocksdb_options_t* opt, unsigned char v) { opt->rep.enable_pipelined_write = v; } +unsigned char rocksdb_options_get_enable_pipelined_write( + rocksdb_options_t* opt) { + return opt->rep.enable_pipelined_write; +} + void rocksdb_options_set_unordered_write(rocksdb_options_t* opt, unsigned char v) { opt->rep.unordered_write = v; } +unsigned char rocksdb_options_get_unordered_write(rocksdb_options_t* opt) { + return opt->rep.unordered_write; +} + void rocksdb_options_set_max_subcompactions(rocksdb_options_t* opt, uint32_t n) { opt->rep.max_subcompactions = n; } +uint32_t rocksdb_options_get_max_subcompactions(rocksdb_options_t* opt) { + return opt->rep.max_subcompactions; +} + void rocksdb_options_set_max_background_jobs(rocksdb_options_t* opt, int n) { opt->rep.max_background_jobs = n; } +int rocksdb_options_get_max_background_jobs(rocksdb_options_t* opt) { + return opt->rep.max_background_jobs; +} + void rocksdb_options_set_max_background_compactions(rocksdb_options_t* opt, int n) { opt->rep.max_background_compactions = n; } +int rocksdb_options_get_max_background_compactions(rocksdb_options_t* opt) { + return opt->rep.max_background_compactions; +} + void rocksdb_options_set_base_background_compactions(rocksdb_options_t* opt, int n) { opt->rep.base_background_compactions = n; } +int rocksdb_options_get_base_background_compactions(rocksdb_options_t* opt) { + return opt->rep.base_background_compactions; +} + void rocksdb_options_set_max_background_flushes(rocksdb_options_t* opt, int n) { opt->rep.max_background_flushes = n; } +int rocksdb_options_get_max_background_flushes(rocksdb_options_t* opt) { + return opt->rep.max_background_flushes; +} + void rocksdb_options_set_max_log_file_size(rocksdb_options_t* opt, size_t v) { opt->rep.max_log_file_size = v; } +size_t rocksdb_options_get_max_log_file_size(rocksdb_options_t* opt) { + return opt->rep.max_log_file_size; +} + void rocksdb_options_set_log_file_time_to_roll(rocksdb_options_t* opt, size_t v) { opt->rep.log_file_time_to_roll = v; } +size_t rocksdb_options_get_log_file_time_to_roll(rocksdb_options_t* opt) { + return opt->rep.log_file_time_to_roll; +} + void rocksdb_options_set_keep_log_file_num(rocksdb_options_t* opt, size_t v) { opt->rep.keep_log_file_num = v; } +size_t rocksdb_options_get_keep_log_file_num(rocksdb_options_t* opt) { + return opt->rep.keep_log_file_num; +} + void rocksdb_options_set_recycle_log_file_num(rocksdb_options_t* opt, size_t v) { opt->rep.recycle_log_file_num = v; } +size_t rocksdb_options_get_recycle_log_file_num(rocksdb_options_t* opt) { + return opt->rep.recycle_log_file_num; +} + void rocksdb_options_set_soft_rate_limit(rocksdb_options_t* opt, double v) { opt->rep.soft_rate_limit = v; } +double rocksdb_options_get_soft_rate_limit(rocksdb_options_t* opt) { + return opt->rep.soft_rate_limit; +} + void rocksdb_options_set_hard_rate_limit(rocksdb_options_t* opt, double v) { opt->rep.hard_rate_limit = v; } +double rocksdb_options_get_hard_rate_limit(rocksdb_options_t* opt) { + return opt->rep.hard_rate_limit; +} + void rocksdb_options_set_soft_pending_compaction_bytes_limit(rocksdb_options_t* opt, size_t v) { opt->rep.soft_pending_compaction_bytes_limit = v; } +size_t rocksdb_options_get_soft_pending_compaction_bytes_limit( + rocksdb_options_t* opt) { + return opt->rep.soft_pending_compaction_bytes_limit; +} + void rocksdb_options_set_hard_pending_compaction_bytes_limit(rocksdb_options_t* opt, size_t v) { opt->rep.hard_pending_compaction_bytes_limit = v; } +size_t rocksdb_options_get_hard_pending_compaction_bytes_limit( + rocksdb_options_t* opt) { + return opt->rep.hard_pending_compaction_bytes_limit; +} + void rocksdb_options_set_rate_limit_delay_max_milliseconds( rocksdb_options_t* opt, unsigned int v) { opt->rep.rate_limit_delay_max_milliseconds = v; } +unsigned int rocksdb_options_get_rate_limit_delay_max_milliseconds( + rocksdb_options_t* opt) { + return opt->rep.rate_limit_delay_max_milliseconds; +} + void rocksdb_options_set_max_manifest_file_size( rocksdb_options_t* opt, size_t v) { opt->rep.max_manifest_file_size = v; } +size_t rocksdb_options_get_max_manifest_file_size(rocksdb_options_t* opt) { + return opt->rep.max_manifest_file_size; +} + void rocksdb_options_set_table_cache_numshardbits( rocksdb_options_t* opt, int v) { opt->rep.table_cache_numshardbits = v; } +int rocksdb_options_get_table_cache_numshardbits(rocksdb_options_t* opt) { + return opt->rep.table_cache_numshardbits; +} + void rocksdb_options_set_table_cache_remove_scan_count_limit( rocksdb_options_t* /*opt*/, int /*v*/) { // this option is deprecated @@ -2631,19 +3352,38 @@ opt->rep.arena_block_size = v; } +size_t rocksdb_options_get_arena_block_size(rocksdb_options_t* opt) { + return opt->rep.arena_block_size; +} + void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t* opt, int disable) { opt->rep.disable_auto_compactions = disable; } +unsigned char rocksdb_options_get_disable_auto_compactions( + rocksdb_options_t* opt) { + return opt->rep.disable_auto_compactions; +} + void rocksdb_options_set_optimize_filters_for_hits(rocksdb_options_t* opt, int v) { opt->rep.optimize_filters_for_hits = v; } +unsigned char rocksdb_options_get_optimize_filters_for_hits( + rocksdb_options_t* opt) { + return opt->rep.optimize_filters_for_hits; +} + void rocksdb_options_set_delete_obsolete_files_period_micros( rocksdb_options_t* opt, uint64_t v) { opt->rep.delete_obsolete_files_period_micros = v; } +uint64_t rocksdb_options_get_delete_obsolete_files_period_micros( + rocksdb_options_t* opt) { + return opt->rep.delete_obsolete_files_period_micros; +} + void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t* opt) { opt->rep.PrepareForBulkLoad(); } @@ -2657,11 +3397,20 @@ opt->rep.memtable_prefix_bloom_size_ratio = v; } +double rocksdb_options_get_memtable_prefix_bloom_size_ratio( + rocksdb_options_t* opt) { + return opt->rep.memtable_prefix_bloom_size_ratio; +} + void rocksdb_options_set_memtable_huge_page_size(rocksdb_options_t* opt, size_t v) { opt->rep.memtable_huge_page_size = v; } +size_t rocksdb_options_get_memtable_huge_page_size(rocksdb_options_t* opt) { + return opt->rep.memtable_huge_page_size; +} + void rocksdb_options_set_hash_skip_list_rep( rocksdb_options_t *opt, size_t bucket_count, int32_t skiplist_height, int32_t skiplist_branching_factor) { @@ -2696,31 +3445,56 @@ opt->rep.max_successive_merges = v; } +size_t rocksdb_options_get_max_successive_merges(rocksdb_options_t* opt) { + return opt->rep.max_successive_merges; +} + void rocksdb_options_set_bloom_locality( rocksdb_options_t* opt, uint32_t v) { opt->rep.bloom_locality = v; } +uint32_t rocksdb_options_get_bloom_locality(rocksdb_options_t* opt) { + return opt->rep.bloom_locality; +} + void rocksdb_options_set_inplace_update_support( rocksdb_options_t* opt, unsigned char v) { opt->rep.inplace_update_support = v; } +unsigned char rocksdb_options_get_inplace_update_support( + rocksdb_options_t* opt) { + return opt->rep.inplace_update_support; +} + void rocksdb_options_set_inplace_update_num_locks( rocksdb_options_t* opt, size_t v) { opt->rep.inplace_update_num_locks = v; } +size_t rocksdb_options_get_inplace_update_num_locks(rocksdb_options_t* opt) { + return opt->rep.inplace_update_num_locks; +} + void rocksdb_options_set_report_bg_io_stats( rocksdb_options_t* opt, int v) { opt->rep.report_bg_io_stats = v; } +unsigned char rocksdb_options_get_report_bg_io_stats(rocksdb_options_t* opt) { + return opt->rep.report_bg_io_stats; +} + void rocksdb_options_set_compaction_style(rocksdb_options_t *opt, int style) { opt->rep.compaction_style = static_cast(style); } +int rocksdb_options_get_compaction_style(rocksdb_options_t* opt) { + return opt->rep.compaction_style; +} + void rocksdb_options_set_universal_compaction_options(rocksdb_options_t *opt, rocksdb_universal_compaction_options_t *uco) { opt->rep.compaction_options_universal = *(uco->rep); } @@ -2750,6 +3524,19 @@ opt->rep.atomic_flush = atomic_flush; } +unsigned char rocksdb_options_get_atomic_flush(rocksdb_options_t* opt) { + return opt->rep.atomic_flush; +} + +void rocksdb_options_set_manual_wal_flush(rocksdb_options_t* opt, + unsigned char manual_wal_flush) { + opt->rep.manual_wal_flush = manual_wal_flush; +} + +unsigned char rocksdb_options_get_manual_wal_flush(rocksdb_options_t* opt) { + return opt->rep.manual_wal_flush; +} + rocksdb_ratelimiter_t* rocksdb_ratelimiter_create( int64_t rate_bytes_per_sec, int64_t refill_period_us, @@ -2771,6 +3558,14 @@ } } +void rocksdb_options_add_compact_on_deletion_collector_factory( + rocksdb_options_t* opt, size_t window_size, size_t num_dels_trigger) { + std::shared_ptr + compact_on_del = + NewCompactOnDeletionCollectorFactory(window_size, num_dels_trigger); + opt->rep.table_properties_collector_factories.emplace_back(compact_on_del); +} + void rocksdb_set_perf_level(int v) { PerfLevel level = static_cast(v); SetPerfLevel(level); @@ -3064,7 +3859,8 @@ delete filter; } -rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format(int bits_per_key, bool original_format) { +rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format( + double bits_per_key, bool original_format) { // Make a rocksdb_filterpolicy_t, but override all of its methods so // they delegate to a NewBloomFilterPolicy() instead of user // supplied C functions. @@ -3099,14 +3895,63 @@ return wrapper; } -rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_full(int bits_per_key) { +rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_full( + double bits_per_key) { return rocksdb_filterpolicy_create_bloom_format(bits_per_key, false); } -rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key) { +rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(double bits_per_key) { return rocksdb_filterpolicy_create_bloom_format(bits_per_key, true); } +rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format( + double bloom_equivalent_bits_per_key, int bloom_before_level) { + // Make a rocksdb_filterpolicy_t, but override all of its methods so + // they delegate to a NewRibbonFilterPolicy() instead of user + // supplied C functions. + struct Wrapper : public rocksdb_filterpolicy_t { + const FilterPolicy* rep_; + ~Wrapper() override { delete rep_; } + const char* Name() const override { return rep_->Name(); } + void CreateFilter(const Slice* keys, int n, + std::string* dst) const override { + return rep_->CreateFilter(keys, n, dst); + } + bool KeyMayMatch(const Slice& key, const Slice& filter) const override { + return rep_->KeyMayMatch(key, filter); + } + ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext( + const ROCKSDB_NAMESPACE::FilterBuildingContext& context) + const override { + return rep_->GetBuilderWithContext(context); + } + ROCKSDB_NAMESPACE::FilterBitsReader* GetFilterBitsReader( + const Slice& contents) const override { + return rep_->GetFilterBitsReader(contents); + } + static void DoNothing(void*) {} + }; + Wrapper* wrapper = new Wrapper; + wrapper->rep_ = + NewRibbonFilterPolicy(bloom_equivalent_bits_per_key, bloom_before_level); + wrapper->state_ = nullptr; + wrapper->delete_filter_ = nullptr; + wrapper->destructor_ = &Wrapper::DoNothing; + return wrapper; +} + +rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon( + double bloom_equivalent_bits_per_key) { + return rocksdb_filterpolicy_create_ribbon_format( + bloom_equivalent_bits_per_key, /*bloom_before_level = disabled*/ -1); +} + +rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_hybrid( + double bloom_equivalent_bits_per_key, int bloom_before_level) { + return rocksdb_filterpolicy_create_ribbon_format( + bloom_equivalent_bits_per_key, bloom_before_level); +} + rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( void* state, void (*destructor)(void*), char* (*full_merge)(void*, const char* key, size_t key_length, @@ -3149,11 +3994,20 @@ opt->rep.verify_checksums = v; } +unsigned char rocksdb_readoptions_get_verify_checksums( + rocksdb_readoptions_t* opt) { + return opt->rep.verify_checksums; +} + void rocksdb_readoptions_set_fill_cache( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.fill_cache = v; } +unsigned char rocksdb_readoptions_get_fill_cache(rocksdb_readoptions_t* opt) { + return opt->rep.fill_cache; +} + void rocksdb_readoptions_set_snapshot( rocksdb_readoptions_t* opt, const rocksdb_snapshot_t* snap) { @@ -3190,11 +4044,19 @@ opt->rep.read_tier = static_cast(v); } +int rocksdb_readoptions_get_read_tier(rocksdb_readoptions_t* opt) { + return static_cast(opt->rep.read_tier); +} + void rocksdb_readoptions_set_tailing( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.tailing = v; } +unsigned char rocksdb_readoptions_get_tailing(rocksdb_readoptions_t* opt) { + return opt->rep.tailing; +} + void rocksdb_readoptions_set_managed( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.managed = v; @@ -3205,37 +4067,89 @@ opt->rep.readahead_size = v; } +size_t rocksdb_readoptions_get_readahead_size(rocksdb_readoptions_t* opt) { + return opt->rep.readahead_size; +} + void rocksdb_readoptions_set_prefix_same_as_start( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.prefix_same_as_start = v; } +unsigned char rocksdb_readoptions_get_prefix_same_as_start( + rocksdb_readoptions_t* opt) { + return opt->rep.prefix_same_as_start; +} + void rocksdb_readoptions_set_pin_data(rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.pin_data = v; } +unsigned char rocksdb_readoptions_get_pin_data(rocksdb_readoptions_t* opt) { + return opt->rep.pin_data; +} + void rocksdb_readoptions_set_total_order_seek(rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.total_order_seek = v; } +unsigned char rocksdb_readoptions_get_total_order_seek( + rocksdb_readoptions_t* opt) { + return opt->rep.total_order_seek; +} + void rocksdb_readoptions_set_max_skippable_internal_keys( rocksdb_readoptions_t* opt, uint64_t v) { opt->rep.max_skippable_internal_keys = v; } +uint64_t rocksdb_readoptions_get_max_skippable_internal_keys( + rocksdb_readoptions_t* opt) { + return opt->rep.max_skippable_internal_keys; +} + void rocksdb_readoptions_set_background_purge_on_iterator_cleanup( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.background_purge_on_iterator_cleanup = v; } +unsigned char rocksdb_readoptions_get_background_purge_on_iterator_cleanup( + rocksdb_readoptions_t* opt) { + return opt->rep.background_purge_on_iterator_cleanup; +} + void rocksdb_readoptions_set_ignore_range_deletions( rocksdb_readoptions_t* opt, unsigned char v) { opt->rep.ignore_range_deletions = v; } +unsigned char rocksdb_readoptions_get_ignore_range_deletions( + rocksdb_readoptions_t* opt) { + return opt->rep.ignore_range_deletions; +} + +void rocksdb_readoptions_set_deadline(rocksdb_readoptions_t* opt, + uint64_t microseconds) { + opt->rep.deadline = std::chrono::microseconds(microseconds); +} + +uint64_t rocksdb_readoptions_get_deadline(rocksdb_readoptions_t* opt) { + return opt->rep.deadline.count(); +} + +void rocksdb_readoptions_set_io_timeout(rocksdb_readoptions_t* opt, + uint64_t microseconds) { + opt->rep.io_timeout = std::chrono::microseconds(microseconds); +} + +extern ROCKSDB_LIBRARY_API uint64_t +rocksdb_readoptions_get_io_timeout(rocksdb_readoptions_t* opt) { + return opt->rep.io_timeout.count(); +} + rocksdb_writeoptions_t* rocksdb_writeoptions_create() { return new rocksdb_writeoptions_t; } @@ -3249,33 +4163,61 @@ opt->rep.sync = v; } +unsigned char rocksdb_writeoptions_get_sync(rocksdb_writeoptions_t* opt) { + return opt->rep.sync; +} + void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable) { opt->rep.disableWAL = disable; } +unsigned char rocksdb_writeoptions_get_disable_WAL( + rocksdb_writeoptions_t* opt) { + return opt->rep.disableWAL; +} + void rocksdb_writeoptions_set_ignore_missing_column_families( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.ignore_missing_column_families = v; } +unsigned char rocksdb_writeoptions_get_ignore_missing_column_families( + rocksdb_writeoptions_t* opt) { + return opt->rep.ignore_missing_column_families; +} + void rocksdb_writeoptions_set_no_slowdown( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.no_slowdown = v; } +unsigned char rocksdb_writeoptions_get_no_slowdown( + rocksdb_writeoptions_t* opt) { + return opt->rep.no_slowdown; +} + void rocksdb_writeoptions_set_low_pri( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.low_pri = v; } +unsigned char rocksdb_writeoptions_get_low_pri(rocksdb_writeoptions_t* opt) { + return opt->rep.low_pri; +} + void rocksdb_writeoptions_set_memtable_insert_hint_per_batch( rocksdb_writeoptions_t* opt, unsigned char v) { opt->rep.memtable_insert_hint_per_batch = v; } +unsigned char rocksdb_writeoptions_get_memtable_insert_hint_per_batch( + rocksdb_writeoptions_t* opt) { + return opt->rep.memtable_insert_hint_per_batch; +} + rocksdb_compactoptions_t* rocksdb_compactoptions_create() { return new rocksdb_compactoptions_t; } @@ -3289,21 +4231,40 @@ opt->rep.bottommost_level_compaction = static_cast(v); } +unsigned char rocksdb_compactoptions_get_bottommost_level_compaction( + rocksdb_compactoptions_t* opt) { + return static_cast(opt->rep.bottommost_level_compaction); +} + void rocksdb_compactoptions_set_exclusive_manual_compaction( rocksdb_compactoptions_t* opt, unsigned char v) { opt->rep.exclusive_manual_compaction = v; } +unsigned char rocksdb_compactoptions_get_exclusive_manual_compaction( + rocksdb_compactoptions_t* opt) { + return opt->rep.exclusive_manual_compaction; +} + void rocksdb_compactoptions_set_change_level(rocksdb_compactoptions_t* opt, unsigned char v) { opt->rep.change_level = v; } +unsigned char rocksdb_compactoptions_get_change_level( + rocksdb_compactoptions_t* opt) { + return opt->rep.change_level; +} + void rocksdb_compactoptions_set_target_level(rocksdb_compactoptions_t* opt, int n) { opt->rep.target_level = n; } +int rocksdb_compactoptions_get_target_level(rocksdb_compactoptions_t* opt) { + return opt->rep.target_level; +} + rocksdb_flushoptions_t* rocksdb_flushoptions_create() { return new rocksdb_flushoptions_t; } @@ -3317,20 +4278,70 @@ opt->rep.wait = v; } +unsigned char rocksdb_flushoptions_get_wait(rocksdb_flushoptions_t* opt) { + return opt->rep.wait; +} + +rocksdb_memory_allocator_t* rocksdb_jemalloc_nodump_allocator_create( + char** errptr) { + rocksdb_memory_allocator_t* allocator = new rocksdb_memory_allocator_t; + ROCKSDB_NAMESPACE::JemallocAllocatorOptions options; + SaveError(errptr, ROCKSDB_NAMESPACE::NewJemallocNodumpAllocator( + options, &allocator->rep)); + return allocator; +} + +void rocksdb_memory_allocator_destroy(rocksdb_memory_allocator_t* allocator) { + delete allocator; +} + +rocksdb_lru_cache_options_t* rocksdb_lru_cache_options_create() { + return new rocksdb_lru_cache_options_t; +} + +void rocksdb_lru_cache_options_destroy(rocksdb_lru_cache_options_t* opt) { + delete opt; +} + +void rocksdb_lru_cache_options_set_capacity(rocksdb_lru_cache_options_t* opt, + size_t capacity) { + opt->rep.capacity = capacity; +} + +void rocksdb_lru_cache_options_set_memory_allocator( + rocksdb_lru_cache_options_t* opt, rocksdb_memory_allocator_t* allocator) { + opt->rep.memory_allocator = allocator->rep; +} + rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity) { rocksdb_cache_t* c = new rocksdb_cache_t; c->rep = NewLRUCache(capacity); return c; } +rocksdb_cache_t* rocksdb_cache_create_lru_opts( + rocksdb_lru_cache_options_t* opt) { + rocksdb_cache_t* c = new rocksdb_cache_t; + c->rep = NewLRUCache(opt->rep); + return c; +} + void rocksdb_cache_destroy(rocksdb_cache_t* cache) { delete cache; } +void rocksdb_cache_disown_data(rocksdb_cache_t* cache) { + cache->rep->DisownData(); +} + void rocksdb_cache_set_capacity(rocksdb_cache_t* cache, size_t capacity) { cache->rep->SetCapacity(capacity); } +size_t rocksdb_cache_get_capacity(rocksdb_cache_t* cache) { + return cache->rep->GetCapacity(); +} + size_t rocksdb_cache_get_usage(rocksdb_cache_t* cache) { return cache->rep->GetUsage(); } @@ -3368,10 +4379,36 @@ env->rep->SetBackgroundThreads(n); } +int rocksdb_env_get_background_threads(rocksdb_env_t* env) { + return env->rep->GetBackgroundThreads(); +} + +void rocksdb_env_set_bottom_priority_background_threads(rocksdb_env_t* env, + int n) { + env->rep->SetBackgroundThreads(n, Env::BOTTOM); +} + +int rocksdb_env_get_bottom_priority_background_threads(rocksdb_env_t* env) { + return env->rep->GetBackgroundThreads(Env::BOTTOM); +} + void rocksdb_env_set_high_priority_background_threads(rocksdb_env_t* env, int n) { env->rep->SetBackgroundThreads(n, Env::HIGH); } +int rocksdb_env_get_high_priority_background_threads(rocksdb_env_t* env) { + return env->rep->GetBackgroundThreads(Env::HIGH); +} + +void rocksdb_env_set_low_priority_background_threads(rocksdb_env_t* env, + int n) { + env->rep->SetBackgroundThreads(n, Env::LOW); +} + +int rocksdb_env_get_low_priority_background_threads(rocksdb_env_t* env) { + return env->rep->GetBackgroundThreads(Env::LOW); +} + void rocksdb_env_join_all_threads(rocksdb_env_t* env) { env->rep->WaitForJoin(); } @@ -3558,10 +4595,11 @@ delete st; } -struct Wrapper : public rocksdb_slicetransform_t { +struct SliceTransformWrapper : public rocksdb_slicetransform_t { const SliceTransform* rep_; - ~Wrapper() override { delete rep_; } + ~SliceTransformWrapper() override { delete rep_; } const char* Name() const override { return rep_->Name(); } + std::string GetId() const override { return rep_->GetId(); } Slice Transform(const Slice& src) const override { return rep_->Transform(src); } @@ -3573,18 +4611,18 @@ }; rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t prefixLen) { - Wrapper* wrapper = new Wrapper; + SliceTransformWrapper* wrapper = new SliceTransformWrapper; wrapper->rep_ = ROCKSDB_NAMESPACE::NewFixedPrefixTransform(prefixLen); wrapper->state_ = nullptr; - wrapper->destructor_ = &Wrapper::DoNothing; + wrapper->destructor_ = &SliceTransformWrapper::DoNothing; return wrapper; } rocksdb_slicetransform_t* rocksdb_slicetransform_create_noop() { - Wrapper* wrapper = new Wrapper; + SliceTransformWrapper* wrapper = new SliceTransformWrapper; wrapper->rep_ = ROCKSDB_NAMESPACE::NewNoopTransform(); wrapper->state_ = nullptr; - wrapper->destructor_ = &Wrapper::DoNothing; + wrapper->destructor_ = &SliceTransformWrapper::DoNothing; return wrapper; } @@ -3599,32 +4637,62 @@ uco->rep->size_ratio = ratio; } +int rocksdb_universal_compaction_options_get_size_ratio( + rocksdb_universal_compaction_options_t* uco) { + return uco->rep->size_ratio; +} + void rocksdb_universal_compaction_options_set_min_merge_width( rocksdb_universal_compaction_options_t* uco, int w) { uco->rep->min_merge_width = w; } +int rocksdb_universal_compaction_options_get_min_merge_width( + rocksdb_universal_compaction_options_t* uco) { + return uco->rep->min_merge_width; +} + void rocksdb_universal_compaction_options_set_max_merge_width( rocksdb_universal_compaction_options_t* uco, int w) { uco->rep->max_merge_width = w; } +int rocksdb_universal_compaction_options_get_max_merge_width( + rocksdb_universal_compaction_options_t* uco) { + return uco->rep->max_merge_width; +} + void rocksdb_universal_compaction_options_set_max_size_amplification_percent( rocksdb_universal_compaction_options_t* uco, int p) { uco->rep->max_size_amplification_percent = p; } +int rocksdb_universal_compaction_options_get_max_size_amplification_percent( + rocksdb_universal_compaction_options_t* uco) { + return uco->rep->max_size_amplification_percent; +} + void rocksdb_universal_compaction_options_set_compression_size_percent( rocksdb_universal_compaction_options_t* uco, int p) { uco->rep->compression_size_percent = p; } +int rocksdb_universal_compaction_options_get_compression_size_percent( + rocksdb_universal_compaction_options_t* uco) { + return uco->rep->compression_size_percent; +} + void rocksdb_universal_compaction_options_set_stop_style( rocksdb_universal_compaction_options_t* uco, int style) { uco->rep->stop_style = static_cast(style); } +int rocksdb_universal_compaction_options_get_stop_style( + rocksdb_universal_compaction_options_t* uco) { + return static_cast(uco->rep->stop_style); +} + void rocksdb_universal_compaction_options_destroy( rocksdb_universal_compaction_options_t* uco) { delete uco->rep; @@ -3642,6 +4710,11 @@ fifo_opts->rep.max_table_files_size = size; } +uint64_t rocksdb_fifo_compaction_options_get_max_table_files_size( + rocksdb_fifo_compaction_options_t* fifo_opts) { + return fifo_opts->rep.max_table_files_size; +} + void rocksdb_fifo_compaction_options_destroy( rocksdb_fifo_compaction_options_t* fifo_opts) { delete fifo_opts; @@ -3665,6 +4738,11 @@ return static_cast(lf->rep.size()); } +const char* rocksdb_livefiles_column_family_name(const rocksdb_livefiles_t* lf, + int index) { + return lf->rep[index].column_family_name.c_str(); +} + const char* rocksdb_livefiles_name( const rocksdb_livefiles_t* lf, int index) { @@ -3831,6 +4909,27 @@ opt->rep.set_snapshot = v; } +char* rocksdb_optimistictransactiondb_property_value( + rocksdb_optimistictransactiondb_t* db, const char* propname) { + std::string tmp; + if (db->rep->GetProperty(Slice(propname), &tmp)) { + // We use strdup() since we expect human readable output. + return strdup(tmp.c_str()); + } else { + return nullptr; + } +} + +int rocksdb_optimistictransactiondb_property_int( + rocksdb_optimistictransactiondb_t* db, const char* propname, + uint64_t* out_val) { + if (db->rep->GetIntProperty(Slice(propname), out_val)) { + return 0; + } else { + return -1; + } +} + rocksdb_column_family_handle_t* rocksdb_transactiondb_create_column_family( rocksdb_transactiondb_t* txn_db, const rocksdb_options_t* column_family_options, @@ -3901,6 +5000,27 @@ delete snapshot; } +char* rocksdb_transactiondb_property_value(rocksdb_transactiondb_t* db, + const char* propname) { + std::string tmp; + if (db->rep->GetProperty(Slice(propname), &tmp)) { + // We use strdup() since we expect human readable output. + return strdup(tmp.c_str()); + } else { + return nullptr; + } +} + +int rocksdb_transactiondb_property_int(rocksdb_transactiondb_t* db, + const char* propname, + uint64_t* out_val) { + if (db->rep->GetIntProperty(Slice(propname), out_val)) { + return 0; + } else { + return -1; + } +} + rocksdb_transaction_t* rocksdb_transaction_begin( rocksdb_transactiondb_t* txn_db, const rocksdb_writeoptions_t* write_options, @@ -3940,7 +5060,10 @@ const rocksdb_snapshot_t* rocksdb_transaction_get_snapshot( rocksdb_transaction_t* txn) { - rocksdb_snapshot_t* result = new rocksdb_snapshot_t; + // This will be freed later on using free, so use malloc here to avoid a + // mismatch + rocksdb_snapshot_t* result = + (rocksdb_snapshot_t*)malloc(sizeof(rocksdb_snapshot_t)); result->rep = txn->rep->GetSnapshot(); return result; } @@ -4300,12 +5423,31 @@ return old_txn; } +// Write batch into OptimisticTransactionDB +void rocksdb_optimistictransactiondb_write( + rocksdb_optimistictransactiondb_t* otxn_db, + const rocksdb_writeoptions_t* options, rocksdb_writebatch_t* batch, + char** errptr) { + SaveError(errptr, otxn_db->rep->Write(options->rep, &batch->rep)); +} + void rocksdb_optimistictransactiondb_close( rocksdb_optimistictransactiondb_t* otxn_db) { delete otxn_db->rep; delete otxn_db; } +rocksdb_checkpoint_t* rocksdb_optimistictransactiondb_checkpoint_object_create( + rocksdb_optimistictransactiondb_t* otxn_db, char** errptr) { + Checkpoint* checkpoint; + if (SaveError(errptr, Checkpoint::Create(otxn_db->rep, &checkpoint))) { + return nullptr; + } + rocksdb_checkpoint_t* result = new rocksdb_checkpoint_t; + result->rep = checkpoint; + return result; +} + void rocksdb_free(void* ptr) { free(ptr); } rocksdb_pinnableslice_t* rocksdb_get_pinned( @@ -4441,11 +5583,25 @@ return memory_usage->cache_total; } +void rocksdb_options_set_dump_malloc_stats(rocksdb_options_t* opt, + unsigned char val) { + opt->rep.dump_malloc_stats = val; +} + +void rocksdb_options_set_memtable_whole_key_filtering(rocksdb_options_t* opt, + unsigned char val) { + opt->rep.memtable_whole_key_filtering = val; +} + // deletes container with memory usage estimates void rocksdb_approximate_memory_usage_destroy(rocksdb_memory_usage_t* usage) { delete usage; } +void rocksdb_cancel_all_background_work(rocksdb_t* db, unsigned char wait) { + CancelAllBackgroundWork(db->rep, wait); +} + } // end extern "C" #endif // !ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/c_test.c mariadb-10.11.13/storage/rocksdb/rocksdb/db/c_test.c --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/c_test.c 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/c_test.c 2025-05-19 16:14:27.000000000 +0000 @@ -7,12 +7,13 @@ #ifndef ROCKSDB_LITE // Lite does not support C API -#include "rocksdb/c.h" - +#include #include #include #include #include + +#include "rocksdb/c.h" #ifndef OS_WIN #include #endif @@ -58,7 +59,11 @@ static const char* GetTempDir(void) { const char* ret = getenv("TEST_TMPDIR"); if (ret == NULL || ret[0] == '\0') - ret = "/tmp"; +#ifdef OS_WIN + ret = getenv("TEMP"); +#else + ret = "/tmp"; +#endif return ret; } #ifdef _MSC_VER @@ -85,10 +90,8 @@ // ok return; } else { - fprintf(stderr, "%s: expected '%s', got '%s'\n", - phase, - (expected ? expected : "(null)"), - (v ? v : "(null")); + fprintf(stderr, "%s: expected '%s', got '%s'\n", phase, + (expected ? expected : "(null)"), (v ? v : "(null)")); abort(); } } @@ -513,6 +516,9 @@ coptions = rocksdb_compactoptions_create(); rocksdb_compactoptions_set_exclusive_manual_compaction(coptions, 1); + rocksdb_options_add_compact_on_deletion_collector_factory(options, 10000, + 10001); + StartPhase("destroy"); rocksdb_destroy_db(options, dbname, &err); Free(&err); @@ -984,7 +990,9 @@ &err); CheckNoError(err); } - rocksdb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes); + rocksdb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes, + &err); + CheckNoError(err); CheckCondition(sizes[0] > 0); CheckCondition(sizes[1] > 0); } @@ -1010,7 +1018,36 @@ CheckGet(db, roptions, "foo", NULL); rocksdb_release_snapshot(db, snap); } - + StartPhase("snapshot_with_memtable_inplace_update"); + { + rocksdb_close(db); + const rocksdb_snapshot_t* snap = NULL; + const char* s_key = "foo_snap"; + const char* value1 = "hello_s1"; + const char* value2 = "hello_s2"; + rocksdb_options_set_allow_concurrent_memtable_write(options, 0); + rocksdb_options_set_inplace_update_support(options, 1); + rocksdb_options_set_error_if_exists(options, 0); + db = rocksdb_open(options, dbname, &err); + CheckNoError(err); + rocksdb_put(db, woptions, s_key, 8, value1, 8, &err); + snap = rocksdb_create_snapshot(db); + assert(snap != NULL); + rocksdb_put(db, woptions, s_key, 8, value2, 8, &err); + CheckNoError(err); + rocksdb_readoptions_set_snapshot(roptions, snap); + CheckGet(db, roptions, "foo", NULL); + // snapshot syntax is invalid, because of inplace update supported is set + CheckGet(db, roptions, s_key, value2); + // restore the data and options + rocksdb_delete(db, woptions, s_key, 8, &err); + CheckGet(db, roptions, s_key, NULL); + rocksdb_release_snapshot(db, snap); + rocksdb_readoptions_set_snapshot(roptions, NULL); + rocksdb_options_set_inplace_update_support(options, 0); + rocksdb_options_set_allow_concurrent_memtable_write(options, 1); + rocksdb_options_set_error_if_exists(options, 1); + } StartPhase("repair"); { // If we do not compact here, then the lazy deletion of @@ -1034,19 +1071,25 @@ } StartPhase("filter"); - for (run = 0; run <= 2; run++) { - // First run uses custom filter - // Second run uses old block-based bloom filter - // Third run uses full bloom filter + for (run = 0; run <= 4; run++) { + // run=0 uses custom filter + // run=1 uses old block-based bloom filter + // run=2 run uses full bloom filter + // run=3 uses Ribbon + // run=4 uses Ribbon-Bloom hybrid configuration CheckNoError(err); rocksdb_filterpolicy_t* policy; if (run == 0) { policy = rocksdb_filterpolicy_create(NULL, FilterDestroy, FilterCreate, FilterKeyMatch, NULL, FilterName); } else if (run == 1) { - policy = rocksdb_filterpolicy_create_bloom(8); + policy = rocksdb_filterpolicy_create_bloom(8.0); + } else if (run == 2) { + policy = rocksdb_filterpolicy_create_bloom_full(8.0); + } else if (run == 3) { + policy = rocksdb_filterpolicy_create_ribbon(8.0); } else { - policy = rocksdb_filterpolicy_create_bloom_full(8); + policy = rocksdb_filterpolicy_create_ribbon_hybrid(8.0, 1); } rocksdb_block_based_options_set_filter_policy(table_options, policy); @@ -1112,10 +1155,12 @@ } else if (run == 1) { // Essentially a fingerprint of the block-based Bloom schema CheckCondition(hits == 241); + } else if (run == 2 || run == 4) { + // Essentially a fingerprint of full Bloom schema, format_version=5 + CheckCondition(hits == 188); } else { - // Essentially a fingerprint of the full Bloom schema(s), - // format_version < 5, which vary for three different CACHE_LINE_SIZEs - CheckCondition(hits == 224 || hits == 180 || hits == 125); + // Essentially a fingerprint of Ribbon schema + CheckCondition(hits == 226); } CheckCondition( (keys_to_query - hits) == @@ -1271,6 +1316,9 @@ CheckPinGetCF(db, roptions, handles[1], "box", "c"); rocksdb_writebatch_destroy(wb); + rocksdb_flush_wal(db, 1, &err); + CheckNoError(err); + const char* keys[3] = { "box", "box", "barfooxx" }; const rocksdb_column_family_handle_t* get_handles[3] = { handles[0], handles[1], handles[1] }; const size_t keys_sizes[3] = { 3, 3, 8 }; @@ -1296,6 +1344,29 @@ Free(&vals[i]); } + { + unsigned char value_found = 0; + + CheckCondition(!rocksdb_key_may_exist(db, roptions, "invalid_key", 11, + NULL, NULL, NULL, 0, NULL)); + CheckCondition(!rocksdb_key_may_exist(db, roptions, "invalid_key", 11, + &vals[0], &vals_sizes[0], NULL, 0, + &value_found)); + if (value_found) { + Free(&vals[0]); + } + + CheckCondition(!rocksdb_key_may_exist_cf(db, roptions, handles[1], + "invalid_key", 11, NULL, NULL, + NULL, 0, NULL)); + CheckCondition(!rocksdb_key_may_exist_cf(db, roptions, handles[1], + "invalid_key", 11, &vals[0], + &vals_sizes[0], NULL, 0, NULL)); + if (value_found) { + Free(&vals[0]); + } + } + rocksdb_iterator_t* iter = rocksdb_create_iterator_cf(db, roptions, handles[1]); CheckCondition(!rocksdb_iter_valid(iter)); rocksdb_iter_seek_to_first(iter); @@ -1461,6 +1532,1079 @@ rocksdb_cuckoo_options_destroy(cuckoo_options); } + StartPhase("options"); + { + rocksdb_options_t* o; + o = rocksdb_options_create(); + + // Set and check options. + rocksdb_options_set_allow_ingest_behind(o, 1); + CheckCondition(1 == rocksdb_options_get_allow_ingest_behind(o)); + + rocksdb_options_compaction_readahead_size(o, 10); + CheckCondition(10 == rocksdb_options_get_compaction_readahead_size(o)); + + rocksdb_options_set_create_if_missing(o, 1); + CheckCondition(1 == rocksdb_options_get_create_if_missing(o)); + + rocksdb_options_set_create_missing_column_families(o, 1); + CheckCondition(1 == rocksdb_options_get_create_missing_column_families(o)); + + rocksdb_options_set_error_if_exists(o, 1); + CheckCondition(1 == rocksdb_options_get_error_if_exists(o)); + + rocksdb_options_set_paranoid_checks(o, 1); + CheckCondition(1 == rocksdb_options_get_paranoid_checks(o)); + + rocksdb_options_set_info_log_level(o, 3); + CheckCondition(3 == rocksdb_options_get_info_log_level(o)); + + rocksdb_options_set_write_buffer_size(o, 100); + CheckCondition(100 == rocksdb_options_get_write_buffer_size(o)); + + rocksdb_options_set_db_write_buffer_size(o, 1000); + CheckCondition(1000 == rocksdb_options_get_db_write_buffer_size(o)); + + rocksdb_options_set_max_open_files(o, 21); + CheckCondition(21 == rocksdb_options_get_max_open_files(o)); + + rocksdb_options_set_max_file_opening_threads(o, 5); + CheckCondition(5 == rocksdb_options_get_max_file_opening_threads(o)); + + rocksdb_options_set_max_total_wal_size(o, 400); + CheckCondition(400 == rocksdb_options_get_max_total_wal_size(o)); + + rocksdb_options_set_num_levels(o, 7); + CheckCondition(7 == rocksdb_options_get_num_levels(o)); + + rocksdb_options_set_level0_file_num_compaction_trigger(o, 4); + CheckCondition(4 == + rocksdb_options_get_level0_file_num_compaction_trigger(o)); + + rocksdb_options_set_level0_slowdown_writes_trigger(o, 6); + CheckCondition(6 == rocksdb_options_get_level0_slowdown_writes_trigger(o)); + + rocksdb_options_set_level0_stop_writes_trigger(o, 8); + CheckCondition(8 == rocksdb_options_get_level0_stop_writes_trigger(o)); + + rocksdb_options_set_target_file_size_base(o, 256); + CheckCondition(256 == rocksdb_options_get_target_file_size_base(o)); + + rocksdb_options_set_target_file_size_multiplier(o, 3); + CheckCondition(3 == rocksdb_options_get_target_file_size_multiplier(o)); + + rocksdb_options_set_max_bytes_for_level_base(o, 1024); + CheckCondition(1024 == rocksdb_options_get_max_bytes_for_level_base(o)); + + rocksdb_options_set_level_compaction_dynamic_level_bytes(o, 1); + CheckCondition(1 == + rocksdb_options_get_level_compaction_dynamic_level_bytes(o)); + + rocksdb_options_set_max_bytes_for_level_multiplier(o, 2.0); + CheckCondition(2.0 == + rocksdb_options_get_max_bytes_for_level_multiplier(o)); + + rocksdb_options_set_skip_stats_update_on_db_open(o, 1); + CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(o)); + + rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(o, 1); + CheckCondition( + 1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(o)); + + rocksdb_options_set_max_write_buffer_number(o, 97); + CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(o)); + + rocksdb_options_set_min_write_buffer_number_to_merge(o, 23); + CheckCondition(23 == + rocksdb_options_get_min_write_buffer_number_to_merge(o)); + + rocksdb_options_set_max_write_buffer_number_to_maintain(o, 64); + CheckCondition(64 == + rocksdb_options_get_max_write_buffer_number_to_maintain(o)); + + rocksdb_options_set_max_write_buffer_size_to_maintain(o, 50000); + CheckCondition(50000 == + rocksdb_options_get_max_write_buffer_size_to_maintain(o)); + + rocksdb_options_set_enable_pipelined_write(o, 1); + CheckCondition(1 == rocksdb_options_get_enable_pipelined_write(o)); + + rocksdb_options_set_unordered_write(o, 1); + CheckCondition(1 == rocksdb_options_get_unordered_write(o)); + + rocksdb_options_set_max_subcompactions(o, 123456); + CheckCondition(123456 == rocksdb_options_get_max_subcompactions(o)); + + rocksdb_options_set_max_background_jobs(o, 2); + CheckCondition(2 == rocksdb_options_get_max_background_jobs(o)); + + rocksdb_options_set_max_background_compactions(o, 3); + CheckCondition(3 == rocksdb_options_get_max_background_compactions(o)); + + rocksdb_options_set_base_background_compactions(o, 4); + CheckCondition(4 == rocksdb_options_get_base_background_compactions(o)); + + rocksdb_options_set_max_background_flushes(o, 5); + CheckCondition(5 == rocksdb_options_get_max_background_flushes(o)); + + rocksdb_options_set_max_log_file_size(o, 6); + CheckCondition(6 == rocksdb_options_get_max_log_file_size(o)); + + rocksdb_options_set_log_file_time_to_roll(o, 7); + CheckCondition(7 == rocksdb_options_get_log_file_time_to_roll(o)); + + rocksdb_options_set_keep_log_file_num(o, 8); + CheckCondition(8 == rocksdb_options_get_keep_log_file_num(o)); + + rocksdb_options_set_recycle_log_file_num(o, 9); + CheckCondition(9 == rocksdb_options_get_recycle_log_file_num(o)); + + rocksdb_options_set_soft_rate_limit(o, 2.0); + CheckCondition(2.0 == rocksdb_options_get_soft_rate_limit(o)); + + rocksdb_options_set_hard_rate_limit(o, 4.0); + CheckCondition(4.0 == rocksdb_options_get_hard_rate_limit(o)); + + rocksdb_options_set_soft_pending_compaction_bytes_limit(o, 10); + CheckCondition(10 == + rocksdb_options_get_soft_pending_compaction_bytes_limit(o)); + + rocksdb_options_set_hard_pending_compaction_bytes_limit(o, 11); + CheckCondition(11 == + rocksdb_options_get_hard_pending_compaction_bytes_limit(o)); + + rocksdb_options_set_rate_limit_delay_max_milliseconds(o, 1); + CheckCondition(1 == + rocksdb_options_get_rate_limit_delay_max_milliseconds(o)); + + rocksdb_options_set_max_manifest_file_size(o, 12); + CheckCondition(12 == rocksdb_options_get_max_manifest_file_size(o)); + + rocksdb_options_set_table_cache_numshardbits(o, 13); + CheckCondition(13 == rocksdb_options_get_table_cache_numshardbits(o)); + + rocksdb_options_set_arena_block_size(o, 14); + CheckCondition(14 == rocksdb_options_get_arena_block_size(o)); + + rocksdb_options_set_use_fsync(o, 1); + CheckCondition(1 == rocksdb_options_get_use_fsync(o)); + + rocksdb_options_set_WAL_ttl_seconds(o, 15); + CheckCondition(15 == rocksdb_options_get_WAL_ttl_seconds(o)); + + rocksdb_options_set_WAL_size_limit_MB(o, 16); + CheckCondition(16 == rocksdb_options_get_WAL_size_limit_MB(o)); + + rocksdb_options_set_manifest_preallocation_size(o, 17); + CheckCondition(17 == rocksdb_options_get_manifest_preallocation_size(o)); + + rocksdb_options_set_allow_mmap_reads(o, 1); + CheckCondition(1 == rocksdb_options_get_allow_mmap_reads(o)); + + rocksdb_options_set_allow_mmap_writes(o, 1); + CheckCondition(1 == rocksdb_options_get_allow_mmap_writes(o)); + + rocksdb_options_set_use_direct_reads(o, 1); + CheckCondition(1 == rocksdb_options_get_use_direct_reads(o)); + + rocksdb_options_set_use_direct_io_for_flush_and_compaction(o, 1); + CheckCondition( + 1 == rocksdb_options_get_use_direct_io_for_flush_and_compaction(o)); + + rocksdb_options_set_is_fd_close_on_exec(o, 1); + CheckCondition(1 == rocksdb_options_get_is_fd_close_on_exec(o)); + + rocksdb_options_set_skip_log_error_on_recovery(o, 1); + CheckCondition(1 == rocksdb_options_get_skip_log_error_on_recovery(o)); + + rocksdb_options_set_stats_dump_period_sec(o, 18); + CheckCondition(18 == rocksdb_options_get_stats_dump_period_sec(o)); + + rocksdb_options_set_stats_persist_period_sec(o, 5); + CheckCondition(5 == rocksdb_options_get_stats_persist_period_sec(o)); + + rocksdb_options_set_advise_random_on_open(o, 1); + CheckCondition(1 == rocksdb_options_get_advise_random_on_open(o)); + + rocksdb_options_set_access_hint_on_compaction_start(o, 3); + CheckCondition(3 == rocksdb_options_get_access_hint_on_compaction_start(o)); + + rocksdb_options_set_use_adaptive_mutex(o, 1); + CheckCondition(1 == rocksdb_options_get_use_adaptive_mutex(o)); + + rocksdb_options_set_bytes_per_sync(o, 19); + CheckCondition(19 == rocksdb_options_get_bytes_per_sync(o)); + + rocksdb_options_set_wal_bytes_per_sync(o, 20); + CheckCondition(20 == rocksdb_options_get_wal_bytes_per_sync(o)); + + rocksdb_options_set_writable_file_max_buffer_size(o, 21); + CheckCondition(21 == rocksdb_options_get_writable_file_max_buffer_size(o)); + + rocksdb_options_set_allow_concurrent_memtable_write(o, 1); + CheckCondition(1 == rocksdb_options_get_allow_concurrent_memtable_write(o)); + + rocksdb_options_set_enable_write_thread_adaptive_yield(o, 1); + CheckCondition(1 == + rocksdb_options_get_enable_write_thread_adaptive_yield(o)); + + rocksdb_options_set_max_sequential_skip_in_iterations(o, 22); + CheckCondition(22 == + rocksdb_options_get_max_sequential_skip_in_iterations(o)); + + rocksdb_options_set_disable_auto_compactions(o, 1); + CheckCondition(1 == rocksdb_options_get_disable_auto_compactions(o)); + + rocksdb_options_set_optimize_filters_for_hits(o, 1); + CheckCondition(1 == rocksdb_options_get_optimize_filters_for_hits(o)); + + rocksdb_options_set_delete_obsolete_files_period_micros(o, 23); + CheckCondition(23 == + rocksdb_options_get_delete_obsolete_files_period_micros(o)); + + rocksdb_options_set_memtable_prefix_bloom_size_ratio(o, 2.0); + CheckCondition(2.0 == + rocksdb_options_get_memtable_prefix_bloom_size_ratio(o)); + + rocksdb_options_set_max_compaction_bytes(o, 24); + CheckCondition(24 == rocksdb_options_get_max_compaction_bytes(o)); + + rocksdb_options_set_memtable_huge_page_size(o, 25); + CheckCondition(25 == rocksdb_options_get_memtable_huge_page_size(o)); + + rocksdb_options_set_max_successive_merges(o, 26); + CheckCondition(26 == rocksdb_options_get_max_successive_merges(o)); + + rocksdb_options_set_bloom_locality(o, 27); + CheckCondition(27 == rocksdb_options_get_bloom_locality(o)); + + rocksdb_options_set_inplace_update_support(o, 1); + CheckCondition(1 == rocksdb_options_get_inplace_update_support(o)); + + rocksdb_options_set_inplace_update_num_locks(o, 28); + CheckCondition(28 == rocksdb_options_get_inplace_update_num_locks(o)); + + rocksdb_options_set_report_bg_io_stats(o, 1); + CheckCondition(1 == rocksdb_options_get_report_bg_io_stats(o)); + + rocksdb_options_set_wal_recovery_mode(o, 2); + CheckCondition(2 == rocksdb_options_get_wal_recovery_mode(o)); + + rocksdb_options_set_compression(o, 5); + CheckCondition(5 == rocksdb_options_get_compression(o)); + + rocksdb_options_set_bottommost_compression(o, 4); + CheckCondition(4 == rocksdb_options_get_bottommost_compression(o)); + + rocksdb_options_set_compaction_style(o, 2); + CheckCondition(2 == rocksdb_options_get_compaction_style(o)); + + rocksdb_options_set_atomic_flush(o, 1); + CheckCondition(1 == rocksdb_options_get_atomic_flush(o)); + + rocksdb_options_set_manual_wal_flush(o, 1); + CheckCondition(1 == rocksdb_options_get_manual_wal_flush(o)); + + /* Blob Options */ + rocksdb_options_set_enable_blob_files(o, 1); + CheckCondition(1 == rocksdb_options_get_enable_blob_files(o)); + + rocksdb_options_set_min_blob_size(o, 29); + CheckCondition(29 == rocksdb_options_get_min_blob_size(o)); + + rocksdb_options_set_blob_file_size(o, 30); + CheckCondition(30 == rocksdb_options_get_blob_file_size(o)); + + rocksdb_options_set_blob_compression_type(o, 4); + CheckCondition(4 == rocksdb_options_get_blob_compression_type(o)); + + rocksdb_options_set_enable_blob_gc(o, 1); + CheckCondition(1 == rocksdb_options_get_enable_blob_gc(o)); + + rocksdb_options_set_blob_gc_age_cutoff(o, 0.5); + CheckCondition(0.5 == rocksdb_options_get_blob_gc_age_cutoff(o)); + + rocksdb_options_set_blob_gc_force_threshold(o, 0.75); + CheckCondition(0.75 == rocksdb_options_get_blob_gc_force_threshold(o)); + + rocksdb_options_set_blob_compaction_readahead_size(o, 262144); + CheckCondition(262144 == + rocksdb_options_get_blob_compaction_readahead_size(o)); + + // Create a copy that should be equal to the original. + rocksdb_options_t* copy; + copy = rocksdb_options_create_copy(o); + + CheckCondition(1 == rocksdb_options_get_allow_ingest_behind(copy)); + CheckCondition(10 == rocksdb_options_get_compaction_readahead_size(copy)); + CheckCondition(1 == rocksdb_options_get_create_if_missing(copy)); + CheckCondition(1 == + rocksdb_options_get_create_missing_column_families(copy)); + CheckCondition(1 == rocksdb_options_get_error_if_exists(copy)); + CheckCondition(1 == rocksdb_options_get_paranoid_checks(copy)); + CheckCondition(3 == rocksdb_options_get_info_log_level(copy)); + CheckCondition(100 == rocksdb_options_get_write_buffer_size(copy)); + CheckCondition(1000 == rocksdb_options_get_db_write_buffer_size(copy)); + CheckCondition(21 == rocksdb_options_get_max_open_files(copy)); + CheckCondition(5 == rocksdb_options_get_max_file_opening_threads(copy)); + CheckCondition(400 == rocksdb_options_get_max_total_wal_size(copy)); + CheckCondition(7 == rocksdb_options_get_num_levels(copy)); + CheckCondition( + 4 == rocksdb_options_get_level0_file_num_compaction_trigger(copy)); + CheckCondition(6 == + rocksdb_options_get_level0_slowdown_writes_trigger(copy)); + CheckCondition(8 == rocksdb_options_get_level0_stop_writes_trigger(copy)); + CheckCondition(256 == rocksdb_options_get_target_file_size_base(copy)); + CheckCondition(3 == rocksdb_options_get_target_file_size_multiplier(copy)); + CheckCondition(1024 == rocksdb_options_get_max_bytes_for_level_base(copy)); + CheckCondition( + 1 == rocksdb_options_get_level_compaction_dynamic_level_bytes(copy)); + CheckCondition(2.0 == + rocksdb_options_get_max_bytes_for_level_multiplier(copy)); + CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(copy)); + CheckCondition( + 1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(copy)); + CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(copy)); + CheckCondition(23 == + rocksdb_options_get_min_write_buffer_number_to_merge(copy)); + CheckCondition( + 64 == rocksdb_options_get_max_write_buffer_number_to_maintain(copy)); + CheckCondition(50000 == + rocksdb_options_get_max_write_buffer_size_to_maintain(copy)); + CheckCondition(1 == rocksdb_options_get_enable_pipelined_write(copy)); + CheckCondition(1 == rocksdb_options_get_unordered_write(copy)); + CheckCondition(123456 == rocksdb_options_get_max_subcompactions(copy)); + CheckCondition(2 == rocksdb_options_get_max_background_jobs(copy)); + CheckCondition(3 == rocksdb_options_get_max_background_compactions(copy)); + CheckCondition(4 == rocksdb_options_get_base_background_compactions(copy)); + CheckCondition(5 == rocksdb_options_get_max_background_flushes(copy)); + CheckCondition(6 == rocksdb_options_get_max_log_file_size(copy)); + CheckCondition(7 == rocksdb_options_get_log_file_time_to_roll(copy)); + CheckCondition(8 == rocksdb_options_get_keep_log_file_num(copy)); + CheckCondition(9 == rocksdb_options_get_recycle_log_file_num(copy)); + CheckCondition(2.0 == rocksdb_options_get_soft_rate_limit(copy)); + CheckCondition(4.0 == rocksdb_options_get_hard_rate_limit(copy)); + CheckCondition( + 10 == rocksdb_options_get_soft_pending_compaction_bytes_limit(copy)); + CheckCondition( + 11 == rocksdb_options_get_hard_pending_compaction_bytes_limit(copy)); + CheckCondition(1 == + rocksdb_options_get_rate_limit_delay_max_milliseconds(copy)); + CheckCondition(12 == rocksdb_options_get_max_manifest_file_size(copy)); + CheckCondition(13 == rocksdb_options_get_table_cache_numshardbits(copy)); + CheckCondition(14 == rocksdb_options_get_arena_block_size(copy)); + CheckCondition(1 == rocksdb_options_get_use_fsync(copy)); + CheckCondition(15 == rocksdb_options_get_WAL_ttl_seconds(copy)); + CheckCondition(16 == rocksdb_options_get_WAL_size_limit_MB(copy)); + CheckCondition(17 == rocksdb_options_get_manifest_preallocation_size(copy)); + CheckCondition(1 == rocksdb_options_get_allow_mmap_reads(copy)); + CheckCondition(1 == rocksdb_options_get_allow_mmap_writes(copy)); + CheckCondition(1 == rocksdb_options_get_use_direct_reads(copy)); + CheckCondition( + 1 == rocksdb_options_get_use_direct_io_for_flush_and_compaction(copy)); + CheckCondition(1 == rocksdb_options_get_is_fd_close_on_exec(copy)); + CheckCondition(1 == rocksdb_options_get_skip_log_error_on_recovery(copy)); + CheckCondition(18 == rocksdb_options_get_stats_dump_period_sec(copy)); + CheckCondition(5 == rocksdb_options_get_stats_persist_period_sec(copy)); + CheckCondition(1 == rocksdb_options_get_advise_random_on_open(copy)); + CheckCondition(3 == + rocksdb_options_get_access_hint_on_compaction_start(copy)); + CheckCondition(1 == rocksdb_options_get_use_adaptive_mutex(copy)); + CheckCondition(19 == rocksdb_options_get_bytes_per_sync(copy)); + CheckCondition(20 == rocksdb_options_get_wal_bytes_per_sync(copy)); + CheckCondition(21 == + rocksdb_options_get_writable_file_max_buffer_size(copy)); + CheckCondition(1 == + rocksdb_options_get_allow_concurrent_memtable_write(copy)); + CheckCondition( + 1 == rocksdb_options_get_enable_write_thread_adaptive_yield(copy)); + CheckCondition(22 == + rocksdb_options_get_max_sequential_skip_in_iterations(copy)); + CheckCondition(1 == rocksdb_options_get_disable_auto_compactions(copy)); + CheckCondition(1 == rocksdb_options_get_optimize_filters_for_hits(copy)); + CheckCondition( + 23 == rocksdb_options_get_delete_obsolete_files_period_micros(copy)); + CheckCondition(2.0 == + rocksdb_options_get_memtable_prefix_bloom_size_ratio(copy)); + CheckCondition(24 == rocksdb_options_get_max_compaction_bytes(copy)); + CheckCondition(25 == rocksdb_options_get_memtable_huge_page_size(copy)); + CheckCondition(26 == rocksdb_options_get_max_successive_merges(copy)); + CheckCondition(27 == rocksdb_options_get_bloom_locality(copy)); + CheckCondition(1 == rocksdb_options_get_inplace_update_support(copy)); + CheckCondition(28 == rocksdb_options_get_inplace_update_num_locks(copy)); + CheckCondition(1 == rocksdb_options_get_report_bg_io_stats(copy)); + CheckCondition(2 == rocksdb_options_get_wal_recovery_mode(copy)); + CheckCondition(5 == rocksdb_options_get_compression(copy)); + CheckCondition(4 == rocksdb_options_get_bottommost_compression(copy)); + CheckCondition(2 == rocksdb_options_get_compaction_style(copy)); + CheckCondition(1 == rocksdb_options_get_atomic_flush(copy)); + + // Copies should be independent. + rocksdb_options_set_allow_ingest_behind(copy, 0); + CheckCondition(0 == rocksdb_options_get_allow_ingest_behind(copy)); + CheckCondition(1 == rocksdb_options_get_allow_ingest_behind(o)); + + rocksdb_options_compaction_readahead_size(copy, 20); + CheckCondition(20 == rocksdb_options_get_compaction_readahead_size(copy)); + CheckCondition(10 == rocksdb_options_get_compaction_readahead_size(o)); + + rocksdb_options_set_create_if_missing(copy, 0); + CheckCondition(0 == rocksdb_options_get_create_if_missing(copy)); + CheckCondition(1 == rocksdb_options_get_create_if_missing(o)); + + rocksdb_options_set_create_missing_column_families(copy, 0); + CheckCondition(0 == + rocksdb_options_get_create_missing_column_families(copy)); + CheckCondition(1 == rocksdb_options_get_create_missing_column_families(o)); + + rocksdb_options_set_error_if_exists(copy, 0); + CheckCondition(0 == rocksdb_options_get_error_if_exists(copy)); + CheckCondition(1 == rocksdb_options_get_error_if_exists(o)); + + rocksdb_options_set_paranoid_checks(copy, 0); + CheckCondition(0 == rocksdb_options_get_paranoid_checks(copy)); + CheckCondition(1 == rocksdb_options_get_paranoid_checks(o)); + + rocksdb_options_set_info_log_level(copy, 2); + CheckCondition(2 == rocksdb_options_get_info_log_level(copy)); + CheckCondition(3 == rocksdb_options_get_info_log_level(o)); + + rocksdb_options_set_write_buffer_size(copy, 200); + CheckCondition(200 == rocksdb_options_get_write_buffer_size(copy)); + CheckCondition(100 == rocksdb_options_get_write_buffer_size(o)); + + rocksdb_options_set_db_write_buffer_size(copy, 2000); + CheckCondition(2000 == rocksdb_options_get_db_write_buffer_size(copy)); + CheckCondition(1000 == rocksdb_options_get_db_write_buffer_size(o)); + + rocksdb_options_set_max_open_files(copy, 42); + CheckCondition(42 == rocksdb_options_get_max_open_files(copy)); + CheckCondition(21 == rocksdb_options_get_max_open_files(o)); + + rocksdb_options_set_max_file_opening_threads(copy, 3); + CheckCondition(3 == rocksdb_options_get_max_file_opening_threads(copy)); + CheckCondition(5 == rocksdb_options_get_max_file_opening_threads(o)); + + rocksdb_options_set_max_total_wal_size(copy, 4000); + CheckCondition(4000 == rocksdb_options_get_max_total_wal_size(copy)); + CheckCondition(400 == rocksdb_options_get_max_total_wal_size(o)); + + rocksdb_options_set_num_levels(copy, 6); + CheckCondition(6 == rocksdb_options_get_num_levels(copy)); + CheckCondition(7 == rocksdb_options_get_num_levels(o)); + + rocksdb_options_set_level0_file_num_compaction_trigger(copy, 14); + CheckCondition( + 14 == rocksdb_options_get_level0_file_num_compaction_trigger(copy)); + CheckCondition(4 == + rocksdb_options_get_level0_file_num_compaction_trigger(o)); + + rocksdb_options_set_level0_slowdown_writes_trigger(copy, 61); + CheckCondition(61 == + rocksdb_options_get_level0_slowdown_writes_trigger(copy)); + CheckCondition(6 == rocksdb_options_get_level0_slowdown_writes_trigger(o)); + + rocksdb_options_set_level0_stop_writes_trigger(copy, 17); + CheckCondition(17 == rocksdb_options_get_level0_stop_writes_trigger(copy)); + CheckCondition(8 == rocksdb_options_get_level0_stop_writes_trigger(o)); + + rocksdb_options_set_target_file_size_base(copy, 128); + CheckCondition(128 == rocksdb_options_get_target_file_size_base(copy)); + CheckCondition(256 == rocksdb_options_get_target_file_size_base(o)); + + rocksdb_options_set_target_file_size_multiplier(copy, 13); + CheckCondition(13 == rocksdb_options_get_target_file_size_multiplier(copy)); + CheckCondition(3 == rocksdb_options_get_target_file_size_multiplier(o)); + + rocksdb_options_set_max_bytes_for_level_base(copy, 900); + CheckCondition(900 == rocksdb_options_get_max_bytes_for_level_base(copy)); + CheckCondition(1024 == rocksdb_options_get_max_bytes_for_level_base(o)); + + rocksdb_options_set_level_compaction_dynamic_level_bytes(copy, 0); + CheckCondition( + 0 == rocksdb_options_get_level_compaction_dynamic_level_bytes(copy)); + CheckCondition(1 == + rocksdb_options_get_level_compaction_dynamic_level_bytes(o)); + + rocksdb_options_set_max_bytes_for_level_multiplier(copy, 8.0); + CheckCondition(8.0 == + rocksdb_options_get_max_bytes_for_level_multiplier(copy)); + CheckCondition(2.0 == + rocksdb_options_get_max_bytes_for_level_multiplier(o)); + + rocksdb_options_set_skip_stats_update_on_db_open(copy, 0); + CheckCondition(0 == rocksdb_options_get_skip_stats_update_on_db_open(copy)); + CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(o)); + + rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(copy, 0); + CheckCondition( + 0 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(copy)); + CheckCondition( + 1 == rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(o)); + + rocksdb_options_set_max_write_buffer_number(copy, 2000); + CheckCondition(2000 == rocksdb_options_get_max_write_buffer_number(copy)); + CheckCondition(97 == rocksdb_options_get_max_write_buffer_number(o)); + + rocksdb_options_set_min_write_buffer_number_to_merge(copy, 146); + CheckCondition(146 == + rocksdb_options_get_min_write_buffer_number_to_merge(copy)); + CheckCondition(23 == + rocksdb_options_get_min_write_buffer_number_to_merge(o)); + + rocksdb_options_set_max_write_buffer_number_to_maintain(copy, 128); + CheckCondition( + 128 == rocksdb_options_get_max_write_buffer_number_to_maintain(copy)); + CheckCondition(64 == + rocksdb_options_get_max_write_buffer_number_to_maintain(o)); + + rocksdb_options_set_max_write_buffer_size_to_maintain(copy, 9000); + CheckCondition(9000 == + rocksdb_options_get_max_write_buffer_size_to_maintain(copy)); + CheckCondition(50000 == + rocksdb_options_get_max_write_buffer_size_to_maintain(o)); + + rocksdb_options_set_enable_pipelined_write(copy, 0); + CheckCondition(0 == rocksdb_options_get_enable_pipelined_write(copy)); + CheckCondition(1 == rocksdb_options_get_enable_pipelined_write(o)); + + rocksdb_options_set_unordered_write(copy, 0); + CheckCondition(0 == rocksdb_options_get_unordered_write(copy)); + CheckCondition(1 == rocksdb_options_get_unordered_write(o)); + + rocksdb_options_set_max_subcompactions(copy, 90001); + CheckCondition(90001 == rocksdb_options_get_max_subcompactions(copy)); + CheckCondition(123456 == rocksdb_options_get_max_subcompactions(o)); + + rocksdb_options_set_max_background_jobs(copy, 12); + CheckCondition(12 == rocksdb_options_get_max_background_jobs(copy)); + CheckCondition(2 == rocksdb_options_get_max_background_jobs(o)); + + rocksdb_options_set_max_background_compactions(copy, 13); + CheckCondition(13 == rocksdb_options_get_max_background_compactions(copy)); + CheckCondition(3 == rocksdb_options_get_max_background_compactions(o)); + + rocksdb_options_set_base_background_compactions(copy, 14); + CheckCondition(14 == rocksdb_options_get_base_background_compactions(copy)); + CheckCondition(4 == rocksdb_options_get_base_background_compactions(o)); + + rocksdb_options_set_max_background_flushes(copy, 15); + CheckCondition(15 == rocksdb_options_get_max_background_flushes(copy)); + CheckCondition(5 == rocksdb_options_get_max_background_flushes(o)); + + rocksdb_options_set_max_log_file_size(copy, 16); + CheckCondition(16 == rocksdb_options_get_max_log_file_size(copy)); + CheckCondition(6 == rocksdb_options_get_max_log_file_size(o)); + + rocksdb_options_set_log_file_time_to_roll(copy, 17); + CheckCondition(17 == rocksdb_options_get_log_file_time_to_roll(copy)); + CheckCondition(7 == rocksdb_options_get_log_file_time_to_roll(o)); + + rocksdb_options_set_keep_log_file_num(copy, 18); + CheckCondition(18 == rocksdb_options_get_keep_log_file_num(copy)); + CheckCondition(8 == rocksdb_options_get_keep_log_file_num(o)); + + rocksdb_options_set_recycle_log_file_num(copy, 19); + CheckCondition(19 == rocksdb_options_get_recycle_log_file_num(copy)); + CheckCondition(9 == rocksdb_options_get_recycle_log_file_num(o)); + + rocksdb_options_set_soft_rate_limit(copy, 4.0); + CheckCondition(4.0 == rocksdb_options_get_soft_rate_limit(copy)); + CheckCondition(2.0 == rocksdb_options_get_soft_rate_limit(o)); + + rocksdb_options_set_hard_rate_limit(copy, 2.0); + CheckCondition(2.0 == rocksdb_options_get_hard_rate_limit(copy)); + CheckCondition(4.0 == rocksdb_options_get_hard_rate_limit(o)); + + rocksdb_options_set_soft_pending_compaction_bytes_limit(copy, 110); + CheckCondition( + 110 == rocksdb_options_get_soft_pending_compaction_bytes_limit(copy)); + CheckCondition(10 == + rocksdb_options_get_soft_pending_compaction_bytes_limit(o)); + + rocksdb_options_set_hard_pending_compaction_bytes_limit(copy, 111); + CheckCondition( + 111 == rocksdb_options_get_hard_pending_compaction_bytes_limit(copy)); + CheckCondition(11 == + rocksdb_options_get_hard_pending_compaction_bytes_limit(o)); + + rocksdb_options_set_rate_limit_delay_max_milliseconds(copy, 0); + CheckCondition(0 == + rocksdb_options_get_rate_limit_delay_max_milliseconds(copy)); + CheckCondition(1 == + rocksdb_options_get_rate_limit_delay_max_milliseconds(o)); + + rocksdb_options_set_max_manifest_file_size(copy, 112); + CheckCondition(112 == rocksdb_options_get_max_manifest_file_size(copy)); + CheckCondition(12 == rocksdb_options_get_max_manifest_file_size(o)); + + rocksdb_options_set_table_cache_numshardbits(copy, 113); + CheckCondition(113 == rocksdb_options_get_table_cache_numshardbits(copy)); + CheckCondition(13 == rocksdb_options_get_table_cache_numshardbits(o)); + + rocksdb_options_set_arena_block_size(copy, 114); + CheckCondition(114 == rocksdb_options_get_arena_block_size(copy)); + CheckCondition(14 == rocksdb_options_get_arena_block_size(o)); + + rocksdb_options_set_use_fsync(copy, 0); + CheckCondition(0 == rocksdb_options_get_use_fsync(copy)); + CheckCondition(1 == rocksdb_options_get_use_fsync(o)); + + rocksdb_options_set_WAL_ttl_seconds(copy, 115); + CheckCondition(115 == rocksdb_options_get_WAL_ttl_seconds(copy)); + CheckCondition(15 == rocksdb_options_get_WAL_ttl_seconds(o)); + + rocksdb_options_set_WAL_size_limit_MB(copy, 116); + CheckCondition(116 == rocksdb_options_get_WAL_size_limit_MB(copy)); + CheckCondition(16 == rocksdb_options_get_WAL_size_limit_MB(o)); + + rocksdb_options_set_manifest_preallocation_size(copy, 117); + CheckCondition(117 == + rocksdb_options_get_manifest_preallocation_size(copy)); + CheckCondition(17 == rocksdb_options_get_manifest_preallocation_size(o)); + + rocksdb_options_set_allow_mmap_reads(copy, 0); + CheckCondition(0 == rocksdb_options_get_allow_mmap_reads(copy)); + CheckCondition(1 == rocksdb_options_get_allow_mmap_reads(o)); + + rocksdb_options_set_allow_mmap_writes(copy, 0); + CheckCondition(0 == rocksdb_options_get_allow_mmap_writes(copy)); + CheckCondition(1 == rocksdb_options_get_allow_mmap_writes(o)); + + rocksdb_options_set_use_direct_reads(copy, 0); + CheckCondition(0 == rocksdb_options_get_use_direct_reads(copy)); + CheckCondition(1 == rocksdb_options_get_use_direct_reads(o)); + + rocksdb_options_set_use_direct_io_for_flush_and_compaction(copy, 0); + CheckCondition( + 0 == rocksdb_options_get_use_direct_io_for_flush_and_compaction(copy)); + CheckCondition( + 1 == rocksdb_options_get_use_direct_io_for_flush_and_compaction(o)); + + rocksdb_options_set_is_fd_close_on_exec(copy, 0); + CheckCondition(0 == rocksdb_options_get_is_fd_close_on_exec(copy)); + CheckCondition(1 == rocksdb_options_get_is_fd_close_on_exec(o)); + + rocksdb_options_set_skip_log_error_on_recovery(copy, 0); + CheckCondition(0 == rocksdb_options_get_skip_log_error_on_recovery(copy)); + CheckCondition(1 == rocksdb_options_get_skip_log_error_on_recovery(o)); + + rocksdb_options_set_stats_dump_period_sec(copy, 218); + CheckCondition(218 == rocksdb_options_get_stats_dump_period_sec(copy)); + CheckCondition(18 == rocksdb_options_get_stats_dump_period_sec(o)); + + rocksdb_options_set_stats_persist_period_sec(copy, 600); + CheckCondition(600 == rocksdb_options_get_stats_persist_period_sec(copy)); + CheckCondition(5 == rocksdb_options_get_stats_persist_period_sec(o)); + + rocksdb_options_set_advise_random_on_open(copy, 0); + CheckCondition(0 == rocksdb_options_get_advise_random_on_open(copy)); + CheckCondition(1 == rocksdb_options_get_advise_random_on_open(o)); + + rocksdb_options_set_access_hint_on_compaction_start(copy, 2); + CheckCondition(2 == + rocksdb_options_get_access_hint_on_compaction_start(copy)); + CheckCondition(3 == rocksdb_options_get_access_hint_on_compaction_start(o)); + + rocksdb_options_set_use_adaptive_mutex(copy, 0); + CheckCondition(0 == rocksdb_options_get_use_adaptive_mutex(copy)); + CheckCondition(1 == rocksdb_options_get_use_adaptive_mutex(o)); + + rocksdb_options_set_bytes_per_sync(copy, 219); + CheckCondition(219 == rocksdb_options_get_bytes_per_sync(copy)); + CheckCondition(19 == rocksdb_options_get_bytes_per_sync(o)); + + rocksdb_options_set_wal_bytes_per_sync(copy, 120); + CheckCondition(120 == rocksdb_options_get_wal_bytes_per_sync(copy)); + CheckCondition(20 == rocksdb_options_get_wal_bytes_per_sync(o)); + + rocksdb_options_set_writable_file_max_buffer_size(copy, 121); + CheckCondition(121 == + rocksdb_options_get_writable_file_max_buffer_size(copy)); + CheckCondition(21 == rocksdb_options_get_writable_file_max_buffer_size(o)); + + rocksdb_options_set_allow_concurrent_memtable_write(copy, 0); + CheckCondition(0 == + rocksdb_options_get_allow_concurrent_memtable_write(copy)); + CheckCondition(1 == rocksdb_options_get_allow_concurrent_memtable_write(o)); + + rocksdb_options_set_enable_write_thread_adaptive_yield(copy, 0); + CheckCondition( + 0 == rocksdb_options_get_enable_write_thread_adaptive_yield(copy)); + CheckCondition(1 == + rocksdb_options_get_enable_write_thread_adaptive_yield(o)); + + rocksdb_options_set_max_sequential_skip_in_iterations(copy, 122); + CheckCondition(122 == + rocksdb_options_get_max_sequential_skip_in_iterations(copy)); + CheckCondition(22 == + rocksdb_options_get_max_sequential_skip_in_iterations(o)); + + rocksdb_options_set_disable_auto_compactions(copy, 0); + CheckCondition(0 == rocksdb_options_get_disable_auto_compactions(copy)); + CheckCondition(1 == rocksdb_options_get_disable_auto_compactions(o)); + + rocksdb_options_set_optimize_filters_for_hits(copy, 0); + CheckCondition(0 == rocksdb_options_get_optimize_filters_for_hits(copy)); + CheckCondition(1 == rocksdb_options_get_optimize_filters_for_hits(o)); + + rocksdb_options_set_delete_obsolete_files_period_micros(copy, 123); + CheckCondition( + 123 == rocksdb_options_get_delete_obsolete_files_period_micros(copy)); + CheckCondition(23 == + rocksdb_options_get_delete_obsolete_files_period_micros(o)); + + rocksdb_options_set_memtable_prefix_bloom_size_ratio(copy, 4.0); + CheckCondition(4.0 == + rocksdb_options_get_memtable_prefix_bloom_size_ratio(copy)); + CheckCondition(2.0 == + rocksdb_options_get_memtable_prefix_bloom_size_ratio(o)); + + rocksdb_options_set_max_compaction_bytes(copy, 124); + CheckCondition(124 == rocksdb_options_get_max_compaction_bytes(copy)); + CheckCondition(24 == rocksdb_options_get_max_compaction_bytes(o)); + + rocksdb_options_set_memtable_huge_page_size(copy, 125); + CheckCondition(125 == rocksdb_options_get_memtable_huge_page_size(copy)); + CheckCondition(25 == rocksdb_options_get_memtable_huge_page_size(o)); + + rocksdb_options_set_max_successive_merges(copy, 126); + CheckCondition(126 == rocksdb_options_get_max_successive_merges(copy)); + CheckCondition(26 == rocksdb_options_get_max_successive_merges(o)); + + rocksdb_options_set_bloom_locality(copy, 127); + CheckCondition(127 == rocksdb_options_get_bloom_locality(copy)); + CheckCondition(27 == rocksdb_options_get_bloom_locality(o)); + + rocksdb_options_set_inplace_update_support(copy, 0); + CheckCondition(0 == rocksdb_options_get_inplace_update_support(copy)); + CheckCondition(1 == rocksdb_options_get_inplace_update_support(o)); + + rocksdb_options_set_inplace_update_num_locks(copy, 128); + CheckCondition(128 == rocksdb_options_get_inplace_update_num_locks(copy)); + CheckCondition(28 == rocksdb_options_get_inplace_update_num_locks(o)); + + rocksdb_options_set_report_bg_io_stats(copy, 0); + CheckCondition(0 == rocksdb_options_get_report_bg_io_stats(copy)); + CheckCondition(1 == rocksdb_options_get_report_bg_io_stats(o)); + + rocksdb_options_set_wal_recovery_mode(copy, 1); + CheckCondition(1 == rocksdb_options_get_wal_recovery_mode(copy)); + CheckCondition(2 == rocksdb_options_get_wal_recovery_mode(o)); + + rocksdb_options_set_compression(copy, 4); + CheckCondition(4 == rocksdb_options_get_compression(copy)); + CheckCondition(5 == rocksdb_options_get_compression(o)); + + rocksdb_options_set_bottommost_compression(copy, 3); + CheckCondition(3 == rocksdb_options_get_bottommost_compression(copy)); + CheckCondition(4 == rocksdb_options_get_bottommost_compression(o)); + + rocksdb_options_set_compaction_style(copy, 1); + CheckCondition(1 == rocksdb_options_get_compaction_style(copy)); + CheckCondition(2 == rocksdb_options_get_compaction_style(o)); + + rocksdb_options_set_atomic_flush(copy, 0); + CheckCondition(0 == rocksdb_options_get_atomic_flush(copy)); + CheckCondition(1 == rocksdb_options_get_atomic_flush(o)); + + rocksdb_options_destroy(copy); + rocksdb_options_destroy(o); + } + + StartPhase("read_options"); + { + rocksdb_readoptions_t* ro; + ro = rocksdb_readoptions_create(); + + rocksdb_readoptions_set_verify_checksums(ro, 1); + CheckCondition(1 == rocksdb_readoptions_get_verify_checksums(ro)); + + rocksdb_readoptions_set_fill_cache(ro, 1); + CheckCondition(1 == rocksdb_readoptions_get_fill_cache(ro)); + + rocksdb_readoptions_set_read_tier(ro, 2); + CheckCondition(2 == rocksdb_readoptions_get_read_tier(ro)); + + rocksdb_readoptions_set_tailing(ro, 1); + CheckCondition(1 == rocksdb_readoptions_get_tailing(ro)); + + rocksdb_readoptions_set_readahead_size(ro, 100); + CheckCondition(100 == rocksdb_readoptions_get_readahead_size(ro)); + + rocksdb_readoptions_set_prefix_same_as_start(ro, 1); + CheckCondition(1 == rocksdb_readoptions_get_prefix_same_as_start(ro)); + + rocksdb_readoptions_set_pin_data(ro, 1); + CheckCondition(1 == rocksdb_readoptions_get_pin_data(ro)); + + rocksdb_readoptions_set_total_order_seek(ro, 1); + CheckCondition(1 == rocksdb_readoptions_get_total_order_seek(ro)); + + rocksdb_readoptions_set_max_skippable_internal_keys(ro, 200); + CheckCondition(200 == + rocksdb_readoptions_get_max_skippable_internal_keys(ro)); + + rocksdb_readoptions_set_background_purge_on_iterator_cleanup(ro, 1); + CheckCondition( + 1 == rocksdb_readoptions_get_background_purge_on_iterator_cleanup(ro)); + + rocksdb_readoptions_set_ignore_range_deletions(ro, 1); + CheckCondition(1 == rocksdb_readoptions_get_ignore_range_deletions(ro)); + + rocksdb_readoptions_set_deadline(ro, 300); + CheckCondition(300 == rocksdb_readoptions_get_deadline(ro)); + + rocksdb_readoptions_set_io_timeout(ro, 400); + CheckCondition(400 == rocksdb_readoptions_get_io_timeout(ro)); + + rocksdb_readoptions_destroy(ro); + } + + StartPhase("write_options"); + { + rocksdb_writeoptions_t* wo; + wo = rocksdb_writeoptions_create(); + + rocksdb_writeoptions_set_sync(wo, 1); + CheckCondition(1 == rocksdb_writeoptions_get_sync(wo)); + + rocksdb_writeoptions_disable_WAL(wo, 1); + CheckCondition(1 == rocksdb_writeoptions_get_disable_WAL(wo)); + + rocksdb_writeoptions_set_ignore_missing_column_families(wo, 1); + CheckCondition(1 == + rocksdb_writeoptions_get_ignore_missing_column_families(wo)); + + rocksdb_writeoptions_set_no_slowdown(wo, 1); + CheckCondition(1 == rocksdb_writeoptions_get_no_slowdown(wo)); + + rocksdb_writeoptions_set_low_pri(wo, 1); + CheckCondition(1 == rocksdb_writeoptions_get_low_pri(wo)); + + rocksdb_writeoptions_set_memtable_insert_hint_per_batch(wo, 1); + CheckCondition(1 == + rocksdb_writeoptions_get_memtable_insert_hint_per_batch(wo)); + + rocksdb_writeoptions_destroy(wo); + } + + StartPhase("compact_options"); + { + rocksdb_compactoptions_t* co; + co = rocksdb_compactoptions_create(); + + rocksdb_compactoptions_set_exclusive_manual_compaction(co, 1); + CheckCondition(1 == + rocksdb_compactoptions_get_exclusive_manual_compaction(co)); + + rocksdb_compactoptions_set_bottommost_level_compaction(co, 1); + CheckCondition(1 == + rocksdb_compactoptions_get_bottommost_level_compaction(co)); + + rocksdb_compactoptions_set_change_level(co, 1); + CheckCondition(1 == rocksdb_compactoptions_get_change_level(co)); + + rocksdb_compactoptions_set_target_level(co, 1); + CheckCondition(1 == rocksdb_compactoptions_get_target_level(co)); + + rocksdb_compactoptions_destroy(co); + } + + StartPhase("flush_options"); + { + rocksdb_flushoptions_t* fo; + fo = rocksdb_flushoptions_create(); + + rocksdb_flushoptions_set_wait(fo, 1); + CheckCondition(1 == rocksdb_flushoptions_get_wait(fo)); + + rocksdb_flushoptions_destroy(fo); + } + + StartPhase("cache_options"); + { + rocksdb_cache_t* co; + co = rocksdb_cache_create_lru(100); + CheckCondition(100 == rocksdb_cache_get_capacity(co)); + + rocksdb_cache_set_capacity(co, 200); + CheckCondition(200 == rocksdb_cache_get_capacity(co)); + + rocksdb_cache_destroy(co); + } + + StartPhase("jemalloc_nodump_allocator"); + { + rocksdb_memory_allocator_t* allocator; + allocator = rocksdb_jemalloc_nodump_allocator_create(&err); + if (err != NULL) { + // not supported on all platforms, allow unsupported error + const char* ni = "Not implemented: "; + size_t ni_len = strlen(ni); + size_t err_len = strlen(err); + + CheckCondition(err_len >= ni_len); + CheckCondition(memcmp(ni, err, ni_len) == 0); + Free(&err); + } else { + rocksdb_cache_t* co; + rocksdb_lru_cache_options_t* copts; + + copts = rocksdb_lru_cache_options_create(); + + rocksdb_lru_cache_options_set_capacity(copts, 100); + rocksdb_lru_cache_options_set_memory_allocator(copts, allocator); + + co = rocksdb_cache_create_lru_opts(copts); + CheckCondition(100 == rocksdb_cache_get_capacity(co)); + + rocksdb_cache_destroy(co); + rocksdb_lru_cache_options_destroy(copts); + } + rocksdb_memory_allocator_destroy(allocator); + } + + StartPhase("env"); + { + rocksdb_env_t* e; + e = rocksdb_create_default_env(); + + rocksdb_env_set_background_threads(e, 10); + CheckCondition(10 == rocksdb_env_get_background_threads(e)); + + rocksdb_env_set_high_priority_background_threads(e, 20); + CheckCondition(20 == rocksdb_env_get_high_priority_background_threads(e)); + + rocksdb_env_set_low_priority_background_threads(e, 30); + CheckCondition(30 == rocksdb_env_get_low_priority_background_threads(e)); + + rocksdb_env_set_bottom_priority_background_threads(e, 40); + CheckCondition(40 == rocksdb_env_get_bottom_priority_background_threads(e)); + + rocksdb_env_destroy(e); + } + + StartPhase("universal_compaction_options"); + { + rocksdb_universal_compaction_options_t* uco; + uco = rocksdb_universal_compaction_options_create(); + + rocksdb_universal_compaction_options_set_size_ratio(uco, 5); + CheckCondition(5 == + rocksdb_universal_compaction_options_get_size_ratio(uco)); + + rocksdb_universal_compaction_options_set_min_merge_width(uco, 15); + CheckCondition( + 15 == rocksdb_universal_compaction_options_get_min_merge_width(uco)); + + rocksdb_universal_compaction_options_set_max_merge_width(uco, 25); + CheckCondition( + 25 == rocksdb_universal_compaction_options_get_max_merge_width(uco)); + + rocksdb_universal_compaction_options_set_max_size_amplification_percent(uco, + 35); + CheckCondition( + 35 == + rocksdb_universal_compaction_options_get_max_size_amplification_percent( + uco)); + + rocksdb_universal_compaction_options_set_compression_size_percent(uco, 45); + CheckCondition( + 45 == + rocksdb_universal_compaction_options_get_compression_size_percent(uco)); + + rocksdb_universal_compaction_options_set_stop_style(uco, 1); + CheckCondition(1 == + rocksdb_universal_compaction_options_get_stop_style(uco)); + + rocksdb_universal_compaction_options_destroy(uco); + } + + StartPhase("fifo_compaction_options"); + { + rocksdb_fifo_compaction_options_t* fco; + fco = rocksdb_fifo_compaction_options_create(); + + rocksdb_fifo_compaction_options_set_max_table_files_size(fco, 100000); + CheckCondition( + 100000 == + rocksdb_fifo_compaction_options_get_max_table_files_size(fco)); + + rocksdb_fifo_compaction_options_destroy(fco); + } + + StartPhase("backupable_db_option"); + { + rocksdb_backupable_db_options_t* bdo; + bdo = rocksdb_backupable_db_options_create("path"); + + rocksdb_backupable_db_options_set_share_table_files(bdo, 1); + CheckCondition(1 == + rocksdb_backupable_db_options_get_share_table_files(bdo)); + + rocksdb_backupable_db_options_set_sync(bdo, 1); + CheckCondition(1 == rocksdb_backupable_db_options_get_sync(bdo)); + + rocksdb_backupable_db_options_set_destroy_old_data(bdo, 1); + CheckCondition(1 == + rocksdb_backupable_db_options_get_destroy_old_data(bdo)); + + rocksdb_backupable_db_options_set_backup_log_files(bdo, 1); + CheckCondition(1 == + rocksdb_backupable_db_options_get_backup_log_files(bdo)); + + rocksdb_backupable_db_options_set_backup_rate_limit(bdo, 123); + CheckCondition(123 == + rocksdb_backupable_db_options_get_backup_rate_limit(bdo)); + + rocksdb_backupable_db_options_set_restore_rate_limit(bdo, 37); + CheckCondition(37 == + rocksdb_backupable_db_options_get_restore_rate_limit(bdo)); + + rocksdb_backupable_db_options_set_max_background_operations(bdo, 20); + CheckCondition( + 20 == rocksdb_backupable_db_options_get_max_background_operations(bdo)); + + rocksdb_backupable_db_options_set_callback_trigger_interval_size(bdo, 9000); + CheckCondition( + 9000 == + rocksdb_backupable_db_options_get_callback_trigger_interval_size(bdo)); + + rocksdb_backupable_db_options_set_max_valid_backups_to_open(bdo, 40); + CheckCondition( + 40 == rocksdb_backupable_db_options_get_max_valid_backups_to_open(bdo)); + + rocksdb_backupable_db_options_set_share_files_with_checksum_naming(bdo, 2); + CheckCondition( + 2 == rocksdb_backupable_db_options_get_share_files_with_checksum_naming( + bdo)); + + rocksdb_backupable_db_options_destroy(bdo); + } + + StartPhase("compression_options"); + { + rocksdb_options_t* co; + co = rocksdb_options_create(); + + rocksdb_options_set_compression_options_zstd_max_train_bytes(co, 100); + CheckCondition( + 100 == + rocksdb_options_get_compression_options_zstd_max_train_bytes(co)); + + rocksdb_options_set_compression_options_parallel_threads(co, 2); + CheckCondition( + 2 == rocksdb_options_get_compression_options_parallel_threads(co)); + + rocksdb_options_set_compression_options_max_dict_buffer_bytes(co, 200); + CheckCondition( + 200 == + rocksdb_options_get_compression_options_max_dict_buffer_bytes(co)); + + rocksdb_options_destroy(co); + } + StartPhase("iterate_upper_bound"); { // Create new empty database @@ -1840,6 +2984,54 @@ CheckNoError(err); } + StartPhase("filter_with_prefix_seek"); + { + rocksdb_close(db); + rocksdb_destroy_db(options, dbname, &err); + CheckNoError(err); + + rocksdb_options_set_prefix_extractor( + options, rocksdb_slicetransform_create_fixed_prefix(1)); + rocksdb_filterpolicy_t* filter_policy = + rocksdb_filterpolicy_create_bloom_full(8.0); + rocksdb_block_based_options_set_filter_policy(table_options, filter_policy); + rocksdb_options_set_block_based_table_factory(options, table_options); + + db = rocksdb_open(options, dbname, &err); + CheckNoError(err); + + int i; + for (i = 0; i < 10; ++i) { + char key = '0' + (char)i; + rocksdb_put(db, woptions, &key, 1, "", 1, &err); + CheckNoError(err); + } + + // Flush to generate an L0 so that filter will be used later. + rocksdb_flushoptions_t* flush_options = rocksdb_flushoptions_create(); + rocksdb_flushoptions_set_wait(flush_options, 1); + rocksdb_flush(db, flush_options, &err); + rocksdb_flushoptions_destroy(flush_options); + CheckNoError(err); + + rocksdb_readoptions_t* ropts = rocksdb_readoptions_create(); + rocksdb_iterator_t* iter = rocksdb_create_iterator(db, ropts); + + rocksdb_iter_seek(iter, "0", 1); + int cnt = 0; + while (rocksdb_iter_valid(iter)) { + ++cnt; + rocksdb_iter_next(iter); + } + CheckCondition(10 == cnt); + + rocksdb_iter_destroy(iter); + rocksdb_readoptions_destroy(ropts); + } + + StartPhase("cancel_all_background_work"); + rocksdb_cancel_all_background_work(db, 1); + StartPhase("cleanup"); rocksdb_close(db); rocksdb_options_destroy(options); @@ -1858,7 +3050,7 @@ #else -int main() { +int main(void) { fprintf(stderr, "SKIPPED\n"); return 0; } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/column_family.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/column_family.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family.cc 2025-05-19 16:14:27.000000000 +0000 @@ -12,9 +12,11 @@ #include #include #include +#include #include #include +#include "db/blob/blob_file_cache.h" #include "db/compaction/compaction_picker.h" #include "db/compaction/compaction_picker_fifo.h" #include "db/compaction/compaction_picker_level.h" @@ -27,13 +29,15 @@ #include "db/version_set.h" #include "db/write_controller.h" #include "file/sst_file_manager_impl.h" -#include "memtable/hash_skiplist_rep.h" +#include "logging/logging.h" #include "monitoring/thread_status_util.h" #include "options/options_helper.h" #include "port/port.h" -#include "table/block_based/block_based_table_factory.h" +#include "rocksdb/convenience.h" +#include "rocksdb/table.h" #include "table/merging_iterator.h" #include "util/autovector.h" +#include "util/cast_util.h" #include "util/compression.h" namespace ROCKSDB_NAMESPACE { @@ -71,11 +75,6 @@ bool defer_purge = db_->immutable_db_options().avoid_unnecessary_blocking_io; db_->PurgeObsoleteFiles(job_context, defer_purge); - if (defer_purge) { - mutex_->Lock(); - db_->SchedulePurge(); - mutex_->Unlock(); - } } job_context.Clean(); } @@ -105,8 +104,9 @@ void GetIntTblPropCollectorFactory( const ImmutableCFOptions& ioptions, - std::vector>* - int_tbl_prop_collector_factories) { + IntTblPropCollectorFactories* int_tbl_prop_collector_factories) { + assert(int_tbl_prop_collector_factories); + auto& collector_factories = ioptions.table_properties_collector_factories; for (size_t i = 0; i < ioptions.table_properties_collector_factories.size(); ++i) { @@ -147,6 +147,16 @@ "should be nonzero if we're using zstd's dictionary generator."); } } + + if (!CompressionTypeSupported(cf_options.blob_compression_type)) { + std::ostringstream oss; + oss << "The specified blob compression type " + << CompressionTypeToString(cf_options.blob_compression_type) + << " is not available."; + + return Status::InvalidArgument(oss.str()); + } + return Status::OK(); } @@ -188,7 +198,7 @@ namespace { const uint64_t kDefaultTtl = 0xfffffffffffffffe; const uint64_t kDefaultPeriodicCompSecs = 0xfffffffffffffffe; -}; // namespace +} // namespace ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, const ColumnFamilyOptions& src) { @@ -196,11 +206,13 @@ size_t clamp_max = std::conditional< sizeof(size_t) == 4, std::integral_constant, std::integral_constant>::type::value; - ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, clamp_max); + ClipToRange(&result.write_buffer_size, (static_cast(64)) << 10, + clamp_max); // if user sets arena_block_size, we trust user to use this value. Otherwise, // calculate a proper value from writer_buffer_size; if (result.arena_block_size <= 0) { - result.arena_block_size = result.write_buffer_size / 8; + result.arena_block_size = + std::min(size_t{1024 * 1024}, result.write_buffer_size / 8); // Align up to 4k const size_t align = 4 * 1024; @@ -269,7 +281,7 @@ } if (result.level0_file_num_compaction_trigger == 0) { - ROCKS_LOG_WARN(db_options.info_log.get(), + ROCKS_LOG_WARN(db_options.logger, "level0_file_num_compaction_trigger cannot be 0"); result.level0_file_num_compaction_trigger = 1; } @@ -278,7 +290,7 @@ result.level0_slowdown_writes_trigger || result.level0_slowdown_writes_trigger < result.level0_file_num_compaction_trigger) { - ROCKS_LOG_WARN(db_options.info_log.get(), + ROCKS_LOG_WARN(db_options.logger, "This condition must be satisfied: " "level0_stop_writes_trigger(%d) >= " "level0_slowdown_writes_trigger(%d) >= " @@ -295,7 +307,7 @@ result.level0_slowdown_writes_trigger) { result.level0_stop_writes_trigger = result.level0_slowdown_writes_trigger; } - ROCKS_LOG_WARN(db_options.info_log.get(), + ROCKS_LOG_WARN(db_options.logger, "Adjust the value to " "level0_stop_writes_trigger(%d)" "level0_slowdown_writes_trigger(%d)" @@ -322,7 +334,9 @@ // was not used) auto sfm = static_cast(db_options.sst_file_manager.get()); for (size_t i = 0; i < result.cf_paths.size(); i++) { - DeleteScheduler::CleanupDirectory(db_options.env, sfm, result.cf_paths[i].path); + DeleteScheduler::CleanupDirectory(db_options.env, sfm, + result.cf_paths[i].path) + .PermitUncheckedError(); } #endif @@ -331,12 +345,18 @@ } if (result.level_compaction_dynamic_level_bytes) { - if (result.compaction_style != kCompactionStyleLevel || - result.cf_paths.size() > 1U) { - // 1. level_compaction_dynamic_level_bytes only makes sense for - // level-based compaction. - // 2. we don't yet know how to make both of this feature and multiple - // DB path work. + if (result.compaction_style != kCompactionStyleLevel) { + ROCKS_LOG_WARN(db_options.info_log.get(), + "level_compaction_dynamic_level_bytes only makes sense" + "for level-based compaction"); + result.level_compaction_dynamic_level_bytes = false; + } else if (result.cf_paths.size() > 1U) { + // we don't yet know how to make both of this feature and multiple + // DB path work. + ROCKS_LOG_WARN(db_options.info_log.get(), + "multiple cf_paths/db_paths and" + "level_compaction_dynamic_level_bytes" + "can't be used together"); result.level_compaction_dynamic_level_bytes = false; } } @@ -345,8 +365,8 @@ result.max_compaction_bytes = result.target_file_size_base * 25; } - bool is_block_based_table = - (result.table_factory->Name() == BlockBasedTableFactory().Name()); + bool is_block_based_table = (result.table_factory->IsInstanceOf( + TableFactory::kBlockBasedTableName())); const uint64_t kAdjustedTtl = 30 * 24 * 60 * 60; if (result.ttl == kDefaultTtl) { @@ -427,6 +447,9 @@ void SuperVersion::Cleanup() { assert(refs.load(std::memory_order_relaxed) == 0); + // Since this SuperVersion object is being deleted, + // decrement reference to the immutable MemtableList + // this SV object was pointing to. imm->Unref(&to_delete); MemTable* m = mem->Unref(); if (m != nullptr) { @@ -436,9 +459,7 @@ to_delete.push_back(m); } current->Unref(); - if (cfd->Unref()) { - delete cfd; - } + cfd->UnrefAndTryDelete(); } void SuperVersion::Init(ColumnFamilyData* new_cfd, MemTable* new_mem, @@ -456,10 +477,10 @@ namespace { void SuperVersionUnrefHandle(void* ptr) { - // UnrefHandle is called when a thread exists or a ThreadLocalPtr gets - // destroyed. When former happens, the thread shouldn't see kSVInUse. - // When latter happens, we are in ~ColumnFamilyData(), no get should happen as - // well. + // UnrefHandle is called when a thread exits or a ThreadLocalPtr gets + // destroyed. When the former happens, the thread shouldn't see kSVInUse. + // When the latter happens, only super_version_ holds a reference + // to ColumnFamilyData, so no further queries are possible. SuperVersion* sv = static_cast(ptr); bool was_last_ref __attribute__((__unused__)); was_last_ref = sv->Unref(); @@ -471,12 +492,25 @@ } } // anonymous namespace +std::vector ColumnFamilyData::GetDbPaths() const { + std::vector paths; + paths.reserve(ioptions_.cf_paths.size()); + for (const DbPath& db_path : ioptions_.cf_paths) { + paths.emplace_back(db_path.path); + } + return paths; +} + +const uint32_t ColumnFamilyData::kDummyColumnFamilyDataId = port::kMaxUint32; + ColumnFamilyData::ColumnFamilyData( uint32_t id, const std::string& name, Version* _dummy_versions, Cache* _table_cache, WriteBufferManager* write_buffer_manager, const ColumnFamilyOptions& cf_options, const ImmutableDBOptions& db_options, - const FileOptions& file_options, ColumnFamilySet* column_family_set, - BlockCacheTracer* const block_cache_tracer) + const FileOptions* file_options, ColumnFamilySet* column_family_set, + BlockCacheTracer* const block_cache_tracer, + const std::shared_ptr& io_tracer, + const std::string& db_session_id) : id_(id), name_(name), dummy_versions_(_dummy_versions), @@ -507,7 +541,23 @@ queued_for_compaction_(false), prev_compaction_needed_bytes_(0), allow_2pc_(db_options.allow_2pc), - last_memtable_id_(0) { + last_memtable_id_(0), + db_paths_registered_(false) { + if (id_ != kDummyColumnFamilyDataId) { + // TODO(cc): RegisterDbPaths can be expensive, considering moving it + // outside of this constructor which might be called with db mutex held. + // TODO(cc): considering using ioptions_.fs, currently some tests rely on + // EnvWrapper, that's the main reason why we use env here. + Status s = ioptions_.env->RegisterDbPaths(GetDbPaths()); + if (s.ok()) { + db_paths_registered_ = true; + } else { + ROCKS_LOG_ERROR( + ioptions_.logger, + "Failed to register data paths of column family (id: %d, name: %s)", + id_, name_.c_str()); + } + } Ref(); // Convert user defined table properties collector factories to internal ones. @@ -516,9 +566,14 @@ // if _dummy_versions is nullptr, then this is a dummy column family. if (_dummy_versions != nullptr) { internal_stats_.reset( - new InternalStats(ioptions_.num_levels, db_options.env, this)); + new InternalStats(ioptions_.num_levels, ioptions_.clock, this)); table_cache_.reset(new TableCache(ioptions_, file_options, _table_cache, - block_cache_tracer)); + block_cache_tracer, io_tracer, + db_session_id)); + blob_file_cache_.reset( + new BlobFileCache(_table_cache, ioptions(), soptions(), id_, + internal_stats_->GetBlobFileReadHist(), io_tracer)); + if (ioptions_.compaction_style == kCompactionStyleLevel) { compaction_picker_.reset( new LevelCompactionPicker(ioptions_, &internal_comparator_)); @@ -532,13 +587,13 @@ } else if (ioptions_.compaction_style == kCompactionStyleNone) { compaction_picker_.reset(new NullCompactionPicker( ioptions_, &internal_comparator_)); - ROCKS_LOG_WARN(ioptions_.info_log, + ROCKS_LOG_WARN(ioptions_.logger, "Column family %s does not use any background compaction. " "Compactions can only be done via CompactFiles\n", GetName().c_str()); #endif // !ROCKSDB_LITE } else { - ROCKS_LOG_ERROR(ioptions_.info_log, + ROCKS_LOG_ERROR(ioptions_.logger, "Unable to recognize the specified compaction style %d. " "Column family %s will use kCompactionStyleLevel.\n", ioptions_.compaction_style, GetName().c_str()); @@ -547,12 +602,12 @@ } if (column_family_set_->NumberOfColumnFamilies() < 10) { - ROCKS_LOG_INFO(ioptions_.info_log, + ROCKS_LOG_INFO(ioptions_.logger, "--------------- Options for column family [%s]:\n", name.c_str()); - initial_cf_options_.Dump(ioptions_.info_log); + initial_cf_options_.Dump(ioptions_.logger); } else { - ROCKS_LOG_INFO(ioptions_.info_log, "\t(skipping printing options)\n"); + ROCKS_LOG_INFO(ioptions_.logger, "\t(skipping printing options)\n"); } } @@ -587,7 +642,7 @@ if (dummy_versions_ != nullptr) { // List must be empty - assert(dummy_versions_->TEST_Next() == dummy_versions_); + assert(dummy_versions_->Next() == dummy_versions_); bool deleted __attribute__((__unused__)); deleted = dummy_versions_->Unref(); assert(deleted); @@ -601,6 +656,18 @@ for (MemTable* m : to_delete) { delete m; } + + if (db_paths_registered_) { + // TODO(cc): considering using ioptions_.fs, currently some tests rely on + // EnvWrapper, that's the main reason why we use env here. + Status s = ioptions_.env->UnregisterDbPaths(GetDbPaths()); + if (!s.ok()) { + ROCKS_LOG_ERROR( + ioptions_.logger, + "Failed to unregister data paths of column family (id: %d, name: %s)", + id_, name_.c_str()); + } + } } bool ColumnFamilyData::UnrefAndTryDelete() { @@ -617,14 +684,13 @@ // Only the super_version_ holds me SuperVersion* sv = super_version_; super_version_ = nullptr; - // Release SuperVersion reference kept in ThreadLocalPtr. - // This must be done outside of mutex_ since unref handler can lock mutex. - sv->db_mutex->Unlock(); + + // Release SuperVersion references kept in ThreadLocalPtr. local_sv_.reset(); - sv->db_mutex->Lock(); if (sv->Unref()) { - // May delete this ColumnFamilyData after calling Cleanup() + // Note: sv will delete this ColumnFamilyData during Cleanup() + assert(sv->cfd == this); sv->Cleanup(); delete sv; return true; @@ -651,9 +717,7 @@ auto current_log = GetLogNumber(); if (allow_2pc_) { - autovector empty_list; - auto imm_prep_log = - imm()->PrecomputeMinLogContainingPrepSection(empty_list); + auto imm_prep_log = imm()->PrecomputeMinLogContainingPrepSection(); auto mem_prep_log = mem()->GetMinLogContainingPrepSection(); if (imm_prep_log > 0 && imm_prep_log < current_log) { @@ -775,7 +839,8 @@ ColumnFamilyData::GetWriteStallConditionAndCause( int num_unflushed_memtables, int num_l0_files, uint64_t num_compaction_needed_bytes, - const MutableCFOptions& mutable_cf_options) { + const MutableCFOptions& mutable_cf_options, + const ImmutableCFOptions& immutable_cf_options) { if (num_unflushed_memtables >= mutable_cf_options.max_write_buffer_number) { return {WriteStallCondition::kStopped, WriteStallCause::kMemtableLimit}; } else if (!mutable_cf_options.disable_auto_compactions && @@ -789,7 +854,9 @@ WriteStallCause::kPendingCompactionBytes}; } else if (mutable_cf_options.max_write_buffer_number > 3 && num_unflushed_memtables >= - mutable_cf_options.max_write_buffer_number - 1) { + mutable_cf_options.max_write_buffer_number - 1 && + num_unflushed_memtables - 1 >= + immutable_cf_options.min_write_buffer_number_to_merge) { return {WriteStallCondition::kDelayed, WriteStallCause::kMemtableLimit}; } else if (!mutable_cf_options.disable_auto_compactions && mutable_cf_options.level0_slowdown_writes_trigger >= 0 && @@ -817,7 +884,8 @@ auto write_stall_condition_and_cause = GetWriteStallConditionAndCause( imm()->NumNotFlushed(), vstorage->l0_delay_trigger_count(), - vstorage->estimated_compaction_needed_bytes(), mutable_cf_options); + vstorage->estimated_compaction_needed_bytes(), mutable_cf_options, + *ioptions()); write_stall_condition = write_stall_condition_and_cause.first; auto write_stall_cause = write_stall_condition_and_cause.second; @@ -829,7 +897,7 @@ write_controller_token_ = write_controller->GetStopToken(); internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_STOPS, 1); ROCKS_LOG_WARN( - ioptions_.info_log, + ioptions_.logger, "[%s] Stopping writes because we have %d immutable memtables " "(waiting for flush), max_write_buffer_number is set to %d", name_.c_str(), imm()->NumNotFlushed(), @@ -842,7 +910,7 @@ internal_stats_->AddCFStats( InternalStats::LOCKED_L0_FILE_COUNT_LIMIT_STOPS, 1); } - ROCKS_LOG_WARN(ioptions_.info_log, + ROCKS_LOG_WARN(ioptions_.logger, "[%s] Stopping writes because we have %d level-0 files", name_.c_str(), vstorage->l0_delay_trigger_count()); } else if (write_stall_condition == WriteStallCondition::kStopped && @@ -851,7 +919,7 @@ internal_stats_->AddCFStats( InternalStats::PENDING_COMPACTION_BYTES_LIMIT_STOPS, 1); ROCKS_LOG_WARN( - ioptions_.info_log, + ioptions_.logger, "[%s] Stopping writes because of estimated pending compaction " "bytes %" PRIu64, name_.c_str(), compaction_needed_bytes); @@ -863,7 +931,7 @@ mutable_cf_options.disable_auto_compactions); internal_stats_->AddCFStats(InternalStats::MEMTABLE_LIMIT_SLOWDOWNS, 1); ROCKS_LOG_WARN( - ioptions_.info_log, + ioptions_.logger, "[%s] Stalling writes because we have %d immutable memtables " "(waiting for flush), max_write_buffer_number is set to %d " "rate %" PRIu64, @@ -885,7 +953,7 @@ internal_stats_->AddCFStats( InternalStats::LOCKED_L0_FILE_COUNT_LIMIT_SLOWDOWNS, 1); } - ROCKS_LOG_WARN(ioptions_.info_log, + ROCKS_LOG_WARN(ioptions_.logger, "[%s] Stalling writes because we have %d level-0 files " "rate %" PRIu64, name_.c_str(), vstorage->l0_delay_trigger_count(), @@ -910,7 +978,7 @@ internal_stats_->AddCFStats( InternalStats::PENDING_COMPACTION_BYTES_LIMIT_SLOWDOWNS, 1); ROCKS_LOG_WARN( - ioptions_.info_log, + ioptions_.logger, "[%s] Stalling writes because of estimated pending compaction " "bytes %" PRIu64 " rate %" PRIu64, name_.c_str(), vstorage->estimated_compaction_needed_bytes(), @@ -924,7 +992,7 @@ write_controller_token_ = write_controller->GetCompactionPressureToken(); ROCKS_LOG_INFO( - ioptions_.info_log, + ioptions_.logger, "[%s] Increasing compaction threads because we have %d level-0 " "files ", name_.c_str(), vstorage->l0_delay_trigger_count()); @@ -938,7 +1006,7 @@ write_controller->GetCompactionPressureToken(); if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0) { ROCKS_LOG_INFO( - ioptions_.info_log, + ioptions_.logger, "[%s] Increasing compaction threads because of estimated pending " "compaction " "bytes %" PRIu64, @@ -983,6 +1051,10 @@ return VersionSet::GetTotalSstFilesSize(dummy_versions_); } +uint64_t ColumnFamilyData::GetTotalBlobFileSize() const { + return VersionSet::GetTotalBlobFileSize(dummy_versions_); +} + uint64_t ColumnFamilyData::GetLiveSstFilesSize() const { return current_->GetSstFilesSize(); } @@ -1003,17 +1075,19 @@ } bool ColumnFamilyData::NeedsCompaction() const { - return compaction_picker_->NeedsCompaction(current_->storage_info()); + return !mutable_cf_options_.disable_auto_compactions && + compaction_picker_->NeedsCompaction(current_->storage_info()); } Compaction* ColumnFamilyData::PickCompaction( - const MutableCFOptions& mutable_options, LogBuffer* log_buffer) { + const MutableCFOptions& mutable_options, + const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer) { SequenceNumber earliest_mem_seqno = std::min(mem_->GetEarliestSequenceNumber(), imm_.current()->GetEarliestSequenceNumber(false)); auto* result = compaction_picker_->PickCompaction( - GetName(), mutable_options, current_->storage_info(), log_buffer, - earliest_mem_seqno); + GetName(), mutable_options, mutable_db_options, current_->storage_info(), + log_buffer, earliest_mem_seqno); if (result != nullptr) { result->SetInputVersion(current_); } @@ -1029,7 +1103,7 @@ Status ColumnFamilyData::RangesOverlapWithMemtables( const autovector& ranges, SuperVersion* super_version, - bool* overlap) { + bool allow_data_in_errors, bool* overlap) { assert(overlap != nullptr); *overlap = false; // Create an InternalIterator over all unflushed memtables @@ -1048,10 +1122,12 @@ super_version->mem->NewRangeTombstoneIterator(read_opts, read_seq); range_del_agg.AddTombstones( std::unique_ptr(active_range_del_iter)); - super_version->imm->AddRangeTombstoneIterators(read_opts, nullptr /* arena */, - &range_del_agg); - Status status; + status = super_version->imm->AddRangeTombstoneIterators( + read_opts, nullptr /* arena */, &range_del_agg); + // AddRangeTombstoneIterators always return Status::OK. + assert(status.ok()); + for (size_t i = 0; i < ranges.size() && status.ok() && !*overlap; ++i) { auto* vstorage = super_version->current->storage_info(); auto* ucmp = vstorage->InternalComparator()->user_comparator(); @@ -1060,12 +1136,12 @@ memtable_iter->Seek(range_start.Encode()); status = memtable_iter->status(); ParsedInternalKey seek_result; - if (status.ok()) { - if (memtable_iter->Valid() && - !ParseInternalKey(memtable_iter->key(), &seek_result)) { - status = Status::Corruption("DB have corrupted keys"); - } + + if (status.ok() && memtable_iter->Valid()) { + status = ParseInternalKey(memtable_iter->key(), &seek_result, + allow_data_in_errors); } + if (status.ok()) { if (memtable_iter->Valid() && ucmp->Compare(seek_result.user_key, ranges[i].limit) <= 0) { @@ -1083,14 +1159,16 @@ const int ColumnFamilyData::kCompactToBaseLevel = -2; Compaction* ColumnFamilyData::CompactRange( - const MutableCFOptions& mutable_cf_options, int input_level, + const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* conflict, uint64_t max_file_num_to_ignore) { auto* result = compaction_picker_->CompactRange( - GetName(), mutable_cf_options, current_->storage_info(), input_level, - output_level, compact_range_options, begin, end, compaction_end, conflict, + GetName(), mutable_cf_options, mutable_db_options, + current_->storage_info(), input_level, output_level, + compact_range_options, begin, end, compaction_end, conflict, max_file_num_to_ignore); if (result != nullptr) { result->SetInputVersion(current_); @@ -1133,11 +1211,11 @@ SuperVersion* sv = static_cast(ptr); if (sv == SuperVersion::kSVObsolete || sv->version_number != super_version_number_.load()) { - RecordTick(ioptions_.statistics, NUMBER_SUPERVERSION_ACQUIRES); + RecordTick(ioptions_.stats, NUMBER_SUPERVERSION_ACQUIRES); SuperVersion* sv_to_delete = nullptr; if (sv && sv->Unref()) { - RecordTick(ioptions_.statistics, NUMBER_SUPERVERSION_CLEANUPS); + RecordTick(ioptions_.stats, NUMBER_SUPERVERSION_CLEANUPS); db->mutex()->Lock(); // NOTE: underlying resources held by superversion (sst files) might // not be released until the next background job. @@ -1181,14 +1259,13 @@ void ColumnFamilyData::InstallSuperVersion( SuperVersionContext* sv_context, InstrumentedMutex* db_mutex) { db_mutex->AssertHeld(); - return InstallSuperVersion(sv_context, db_mutex, mutable_cf_options_); + return InstallSuperVersion(sv_context, mutable_cf_options_); } void ColumnFamilyData::InstallSuperVersion( - SuperVersionContext* sv_context, InstrumentedMutex* db_mutex, + SuperVersionContext* sv_context, const MutableCFOptions& mutable_cf_options) { SuperVersion* new_superversion = sv_context->new_superversion.release(); - new_superversion->db_mutex = db_mutex; new_superversion->mutable_cf_options = mutable_cf_options; new_superversion->Init(this, mem_, imm_.current(), current_); SuperVersion* old_superversion = super_version_; @@ -1260,7 +1337,8 @@ } if (cf_options.ttl > 0 && cf_options.ttl != kDefaultTtl) { - if (cf_options.table_factory->Name() != BlockBasedTableFactory().Name()) { + if (!cf_options.table_factory->IsInstanceOf( + TableFactory::kBlockBasedTableName())) { return Status::NotSupported( "TTL is only supported in Block-Based Table format. "); } @@ -1268,30 +1346,53 @@ if (cf_options.periodic_compaction_seconds > 0 && cf_options.periodic_compaction_seconds != kDefaultPeriodicCompSecs) { - if (cf_options.table_factory->Name() != BlockBasedTableFactory().Name()) { + if (!cf_options.table_factory->IsInstanceOf( + TableFactory::kBlockBasedTableName())) { return Status::NotSupported( "Periodic Compaction is only supported in " "Block-Based Table format. "); } } + + if (cf_options.enable_blob_garbage_collection) { + if (cf_options.blob_garbage_collection_age_cutoff < 0.0 || + cf_options.blob_garbage_collection_age_cutoff > 1.0) { + return Status::InvalidArgument( + "The age cutoff for blob garbage collection should be in the range " + "[0.0, 1.0]."); + } + if (cf_options.blob_garbage_collection_force_threshold < 0.0 || + cf_options.blob_garbage_collection_force_threshold > 1.0) { + return Status::InvalidArgument( + "The garbage ratio threshold for forcing blob garbage collection " + "should be in the range [0.0, 1.0]."); + } + } + + if (cf_options.compaction_style == kCompactionStyleFIFO && + db_options.max_open_files != -1 && cf_options.ttl > 0) { + return Status::NotSupported( + "FIFO compaction only supported with max_open_files = -1."); + } + return s; } #ifndef ROCKSDB_LITE Status ColumnFamilyData::SetOptions( - const DBOptions& db_options, + const DBOptions& db_opts, const std::unordered_map& options_map) { - MutableCFOptions new_mutable_cf_options; - Status s = - GetMutableOptionsFromStrings(mutable_cf_options_, options_map, - ioptions_.info_log, &new_mutable_cf_options); + ColumnFamilyOptions cf_opts = + BuildColumnFamilyOptions(initial_cf_options_, mutable_cf_options_); + ConfigOptions config_opts; + config_opts.mutable_options_only = true; + Status s = GetColumnFamilyOptionsFromMap(config_opts, cf_opts, options_map, + &cf_opts); if (s.ok()) { - ColumnFamilyOptions cf_options = - BuildColumnFamilyOptions(initial_cf_options_, new_mutable_cf_options); - s = ValidateOptions(db_options, cf_options); + s = ValidateOptions(db_opts, cf_opts); } if (s.ok()) { - mutable_cf_options_ = new_mutable_cf_options; + mutable_cf_options_ = MutableCFOptions(cf_opts); mutable_cf_options_.RefreshDerivedOptions(ioptions_); } return s; @@ -1321,7 +1422,7 @@ } Status ColumnFamilyData::AddDirectories( - std::map>* created_dirs) { + std::map>* created_dirs) { Status s; assert(created_dirs != nullptr); assert(data_dirs_.empty()); @@ -1329,8 +1430,9 @@ auto existing_dir = created_dirs->find(p.path); if (existing_dir == created_dirs->end()) { - std::unique_ptr path_directory; - s = DBImpl::CreateAndNewDirectory(ioptions_.env, p.path, &path_directory); + std::unique_ptr path_directory; + s = DBImpl::CreateAndNewDirectory(ioptions_.fs.get(), p.path, + &path_directory); if (!s.ok()) { return s; } @@ -1345,7 +1447,7 @@ return s; } -Directory* ColumnFamilyData::GetDataDir(size_t path_id) const { +FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const { if (data_dirs_.empty()) { return nullptr; } @@ -1358,21 +1460,26 @@ const ImmutableDBOptions* db_options, const FileOptions& file_options, Cache* table_cache, - WriteBufferManager* write_buffer_manager, - WriteController* write_controller, - BlockCacheTracer* const block_cache_tracer) + WriteBufferManager* _write_buffer_manager, + WriteController* _write_controller, + BlockCacheTracer* const block_cache_tracer, + const std::shared_ptr& io_tracer, + const std::string& db_session_id) : max_column_family_(0), + file_options_(file_options), dummy_cfd_(new ColumnFamilyData( - 0, "", nullptr, nullptr, nullptr, ColumnFamilyOptions(), *db_options, - file_options, nullptr, block_cache_tracer)), + ColumnFamilyData::kDummyColumnFamilyDataId, "", nullptr, nullptr, + nullptr, ColumnFamilyOptions(), *db_options, &file_options_, nullptr, + block_cache_tracer, io_tracer, db_session_id)), default_cfd_cache_(nullptr), db_name_(dbname), db_options_(db_options), - file_options_(file_options), table_cache_(table_cache), - write_buffer_manager_(write_buffer_manager), - write_controller_(write_controller), - block_cache_tracer_(block_cache_tracer) { + write_buffer_manager_(_write_buffer_manager), + write_controller_(_write_controller), + block_cache_tracer_(block_cache_tracer), + io_tracer_(io_tracer), + db_session_id_(db_session_id) { // initialize linked list dummy_cfd_->prev_ = dummy_cfd_; dummy_cfd_->next_ = dummy_cfd_; @@ -1438,7 +1545,8 @@ assert(column_families_.find(name) == column_families_.end()); ColumnFamilyData* new_cfd = new ColumnFamilyData( id, name, dummy_versions, table_cache_, write_buffer_manager_, options, - *db_options_, file_options_, this, block_cache_tracer_); + *db_options_, &file_options_, this, block_cache_tracer_, io_tracer_, + db_session_id_); column_families_.insert({name, id}); column_family_data_.insert({id, new_cfd}); max_column_family_ = std::max(max_column_family_, id); @@ -1454,20 +1562,6 @@ return new_cfd; } -// REQUIRES: DB mutex held -void ColumnFamilySet::FreeDeadColumnFamilies() { - autovector to_delete; - for (auto cfd = dummy_cfd_->next_; cfd != dummy_cfd_; cfd = cfd->next_) { - if (cfd->refs_.load(std::memory_order_relaxed) == 0) { - to_delete.push_back(cfd); - } - } - for (auto cfd : to_delete) { - // this is very rare, so it's not a problem that we do it under a mutex - delete cfd; - } -} - // under a DB mutex AND from a write thread void ColumnFamilySet::RemoveColumnFamily(ColumnFamilyData* cfd) { auto cfd_iter = column_family_data_.find(cfd->GetID()); @@ -1506,7 +1600,7 @@ uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family) { uint32_t column_family_id = 0; if (column_family != nullptr) { - auto cfh = reinterpret_cast(column_family); + auto cfh = static_cast_with_check(column_family); column_family_id = cfh->GetID(); } return column_family_id; diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/column_family.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/column_family.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family.h 2025-05-19 16:14:27.000000000 +0000 @@ -44,6 +44,7 @@ class InstrumentedMutex; class InstrumentedMutexLock; struct SuperVersionContext; +class BlobFileCache; extern const double kIncSlowdownRatio; // This file contains a list of data structures for managing column family @@ -207,8 +208,6 @@ uint64_t version_number; WriteStallCondition write_stall_condition; - InstrumentedMutex* db_mutex; - // should be called outside the mutex SuperVersion() = default; ~SuperVersion(); @@ -252,13 +251,12 @@ extern ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, const ColumnFamilyOptions& src); -// Wrap user defined table proproties collector factories `from cf_options` +// Wrap user defined table properties collector factories `from cf_options` // into internal ones in int_tbl_prop_collector_factories. Add a system internal // one too. extern void GetIntTblPropCollectorFactory( const ImmutableCFOptions& ioptions, - std::vector>* - int_tbl_prop_collector_factories); + IntTblPropCollectorFactories* int_tbl_prop_collector_factories); class ColumnFamilySet; @@ -278,17 +276,6 @@ // holding a DB mutex, or as the leader in a write batch group). void Ref() { refs_.fetch_add(1); } - // Unref decreases the reference count, but does not handle deletion - // when the count goes to 0. If this method returns true then the - // caller should delete the instance immediately, or later, by calling - // FreeDeadColumnFamilies(). Unref() can only be called while holding - // a DB mutex, or during single-threaded recovery. - bool Unref() { - int old_refs = refs_.fetch_sub(1); - assert(old_refs > 0); - return old_refs == 1; - } - // UnrefAndTryDelete() decreases the reference count and do free if needed, // return true if this is freed else false, UnrefAndTryDelete() can only // be called while holding a DB mutex, or during single-threaded recovery. @@ -325,7 +312,7 @@ FlushReason GetFlushReason() const { return flush_reason_; } // thread-safe const FileOptions* soptions() const; - const ImmutableCFOptions* ioptions() const { return &ioptions_; } + const ImmutableOptions* ioptions() const { return &ioptions_; } // REQUIRES: DB mutex held // This returns the MutableCFOptions used by current SuperVersion // You should use this API to reference MutableCFOptions most of the time. @@ -359,12 +346,18 @@ MemTableList* imm() { return &imm_; } MemTable* mem() { return mem_; } + + bool IsEmpty() { + return mem()->GetFirstSequenceNumber() == 0 && imm()->NumNotFlushed() == 0; + } + Version* current() { return current_; } Version* dummy_versions() { return dummy_versions_; } void SetCurrent(Version* _current); uint64_t GetNumLiveVersions() const; // REQUIRE: DB mutex held uint64_t GetTotalSstFilesSize() const; // REQUIRE: DB mutex held uint64_t GetLiveSstFilesSize() const; // REQUIRE: DB mutex held + uint64_t GetTotalBlobFileSize() const; // REQUIRE: DB mutex held void SetMemtable(MemTable* new_mem) { uint64_t memtable_id = last_memtable_id_.fetch_add(1) + 1; new_mem->SetID(memtable_id); @@ -381,12 +374,14 @@ SequenceNumber earliest_seq); TableCache* table_cache() const { return table_cache_.get(); } + BlobFileCache* blob_file_cache() const { return blob_file_cache_.get(); } // See documentation in compaction_picker.h // REQUIRES: DB mutex held bool NeedsCompaction() const; // REQUIRES: DB mutex held Compaction* PickCompaction(const MutableCFOptions& mutable_options, + const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer); // Check if the passed range overlap with any running compactions. @@ -403,7 +398,8 @@ // // Thread-safe Status RangesOverlapWithMemtables(const autovector& ranges, - SuperVersion* super_version, bool* overlap); + SuperVersion* super_version, + bool allow_data_in_errors, bool* overlap); // A flag to tell a manual compaction is to compact all levels together // instead of a specific level. @@ -412,6 +408,7 @@ static const int kCompactToBaseLevel; // REQUIRES: DB mutex held Compaction* CompactRange(const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, @@ -428,8 +425,7 @@ return internal_comparator_; } - const std::vector>* - int_tbl_prop_collector_factories() const { + const IntTblPropCollectorFactories* int_tbl_prop_collector_factories() const { return &int_tbl_prop_collector_factories_; } @@ -441,7 +437,7 @@ // Get SuperVersion stored in thread local storage. If it does not exist, // get a reference from a current SuperVersion. SuperVersion* GetThreadLocalSuperVersion(DBImpl* db); - // Try to return SuperVersion back to thread local storage. Retrun true on + // Try to return SuperVersion back to thread local storage. Return true on // success and false on failure. It fails when the thread local storage // contains anything other than SuperVersion::kSVInUse flag. bool ReturnThreadLocalSuperVersion(SuperVersion* sv); @@ -455,7 +451,6 @@ // the clients to allocate SuperVersion outside of mutex. // IMPORTANT: Only call this from DBImpl::InstallSuperVersion() void InstallSuperVersion(SuperVersionContext* sv_context, - InstrumentedMutex* db_mutex, const MutableCFOptions& mutable_cf_options); void InstallSuperVersion(SuperVersionContext* sv_context, InstrumentedMutex* db_mutex); @@ -475,9 +470,11 @@ kPendingCompactionBytes, }; static std::pair - GetWriteStallConditionAndCause(int num_unflushed_memtables, int num_l0_files, - uint64_t num_compaction_needed_bytes, - const MutableCFOptions& mutable_cf_options); + GetWriteStallConditionAndCause( + int num_unflushed_memtables, int num_l0_files, + uint64_t num_compaction_needed_bytes, + const MutableCFOptions& mutable_cf_options, + const ImmutableCFOptions& immutable_cf_options); // Recalculate some small conditions, which are changed only during // compaction, adding new memtable and/or @@ -500,11 +497,29 @@ // created_dirs remembers directory created, so that we don't need to call // the same data creation operation again. Status AddDirectories( - std::map>* created_dirs); + std::map>* created_dirs); + + FSDirectory* GetDataDir(size_t path_id) const; + + // full_history_ts_low_ can only increase. + void SetFullHistoryTsLow(std::string ts_low) { + assert(!ts_low.empty()); + const Comparator* ucmp = user_comparator(); + assert(ucmp); + if (full_history_ts_low_.empty() || + ucmp->CompareTimestamp(ts_low, full_history_ts_low_) > 0) { + full_history_ts_low_ = std::move(ts_low); + } + } - Directory* GetDataDir(size_t path_id) const; + const std::string& GetFullHistoryTsLow() const { + return full_history_ts_low_; + } ThreadLocalPtr* TEST_GetLocalSV() { return local_sv_.get(); } + WriteBufferManager* write_buffer_mgr() { return write_buffer_manager_; } + + static const uint32_t kDummyColumnFamilyDataId; private: friend class ColumnFamilySet; @@ -513,9 +528,13 @@ WriteBufferManager* write_buffer_manager, const ColumnFamilyOptions& options, const ImmutableDBOptions& db_options, - const FileOptions& file_options, + const FileOptions* file_options, ColumnFamilySet* column_family_set, - BlockCacheTracer* const block_cache_tracer); + BlockCacheTracer* const block_cache_tracer, + const std::shared_ptr& io_tracer, + const std::string& db_session_id); + + std::vector GetDbPaths() const; uint32_t id_; const std::string name_; @@ -527,16 +546,16 @@ std::atomic dropped_; // true if client dropped it const InternalKeyComparator internal_comparator_; - std::vector> - int_tbl_prop_collector_factories_; + IntTblPropCollectorFactories int_tbl_prop_collector_factories_; const ColumnFamilyOptions initial_cf_options_; - const ImmutableCFOptions ioptions_; + const ImmutableOptions ioptions_; MutableCFOptions mutable_cf_options_; const bool is_delete_range_supported_; std::unique_ptr table_cache_; + std::unique_ptr blob_file_cache_; std::unique_ptr internal_stats_; @@ -592,7 +611,11 @@ std::atomic last_memtable_id_; // Directories corresponding to cf_paths. - std::vector> data_dirs_; + std::vector> data_dirs_; + + bool db_paths_registered_; + + std::string full_history_ts_low_; }; // ColumnFamilySet has interesting thread-safety requirements @@ -605,10 +628,8 @@ // held and it needs to be executed from the write thread. SetDropped() also // guarantees that it will be called only from single-threaded LogAndApply(), // but this condition is not that important. -// * Iteration -- hold DB mutex, but you can release it in the body of -// iteration. If you release DB mutex in body, reference the column -// family before the mutex and unreference after you unlock, since the column -// family might get dropped when the DB mutex is released +// * Iteration -- hold DB mutex. If you want to release the DB mutex in the +// body of the iteration, wrap in a RefedColumnFamilySet. // * GetDefault() -- thread safe // * GetColumnFamily() -- either inside of DB mutex or from a write thread // * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily(), @@ -620,17 +641,12 @@ public: explicit iterator(ColumnFamilyData* cfd) : current_(cfd) {} + // NOTE: minimum operators for for-loop iteration iterator& operator++() { - // dropped column families might still be included in this iteration - // (we're only removing them when client drops the last reference to the - // column family). - // dummy is never dead, so this will never be infinite - do { - current_ = current_->next_; - } while (current_->refs_.load(std::memory_order_relaxed) == 0); + current_ = current_->next_; return *this; } - bool operator!=(const iterator& other) { + bool operator!=(const iterator& other) const { return this->current_ != other.current_; } ColumnFamilyData* operator*() { return current_; } @@ -642,9 +658,11 @@ ColumnFamilySet(const std::string& dbname, const ImmutableDBOptions* db_options, const FileOptions& file_options, Cache* table_cache, - WriteBufferManager* write_buffer_manager, - WriteController* write_controller, - BlockCacheTracer* const block_cache_tracer); + WriteBufferManager* _write_buffer_manager, + WriteController* _write_controller, + BlockCacheTracer* const block_cache_tracer, + const std::shared_ptr& io_tracer, + const std::string& db_session_id); ~ColumnFamilySet(); ColumnFamilyData* GetDefault() const; @@ -667,12 +685,12 @@ iterator begin() { return iterator(dummy_cfd_->next_); } iterator end() { return iterator(dummy_cfd_); } - // REQUIRES: DB mutex held - // Don't call while iterating over ColumnFamilySet - void FreeDeadColumnFamilies(); - Cache* get_table_cache() { return table_cache_; } + WriteBufferManager* write_buffer_manager() { return write_buffer_manager_; } + + WriteController* write_controller() { return write_controller_; } + private: friend class ColumnFamilyData; // helper function that gets called from cfd destructor @@ -690,6 +708,8 @@ std::unordered_map column_family_data_; uint32_t max_column_family_; + const FileOptions file_options_; + ColumnFamilyData* dummy_cfd_; // We don't hold the refcount here, since default column family always exists // We are also not responsible for cleaning up default_cfd_cache_. This is @@ -699,11 +719,61 @@ const std::string db_name_; const ImmutableDBOptions* const db_options_; - const FileOptions file_options_; Cache* table_cache_; WriteBufferManager* write_buffer_manager_; WriteController* write_controller_; BlockCacheTracer* const block_cache_tracer_; + std::shared_ptr io_tracer_; + std::string db_session_id_; +}; + +// A wrapper for ColumnFamilySet that supports releasing DB mutex during each +// iteration over the iterator, because the cfd is Refed and Unrefed during +// each iteration to prevent concurrent CF drop from destroying it (until +// Unref). +class RefedColumnFamilySet { + public: + explicit RefedColumnFamilySet(ColumnFamilySet* cfs) : wrapped_(cfs) {} + + class iterator { + public: + explicit iterator(ColumnFamilySet::iterator wrapped) : wrapped_(wrapped) { + MaybeRef(*wrapped_); + } + ~iterator() { MaybeUnref(*wrapped_); } + inline void MaybeRef(ColumnFamilyData* cfd) { + if (cfd->GetID() != ColumnFamilyData::kDummyColumnFamilyDataId) { + cfd->Ref(); + } + } + inline void MaybeUnref(ColumnFamilyData* cfd) { + if (cfd->GetID() != ColumnFamilyData::kDummyColumnFamilyDataId) { + cfd->UnrefAndTryDelete(); + } + } + // NOTE: minimum operators for for-loop iteration + inline iterator& operator++() { + ColumnFamilyData* old = *wrapped_; + ++wrapped_; + // Can only unref & potentially free cfd after accessing its next_ + MaybeUnref(old); + MaybeRef(*wrapped_); + return *this; + } + inline bool operator!=(const iterator& other) const { + return this->wrapped_ != other.wrapped_; + } + inline ColumnFamilyData* operator*() { return *wrapped_; } + + private: + ColumnFamilySet::iterator wrapped_; + }; + + iterator begin() { return iterator(wrapped_->begin()); } + iterator end() { return iterator(wrapped_->end()); } + + private: + ColumnFamilySet* wrapped_; }; // We use ColumnFamilyMemTablesImpl to provide WriteBatch a way to access diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/column_family_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/column_family_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/column_family_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -8,45 +8,37 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include -#include #include #include +#include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" -#include "memtable/hash_skiplist_rep.h" #include "options/options_parser.h" #include "port/port.h" #include "port/stack_trace.h" +#include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/utilities/object_registry.h" -#include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" #include "util/string_util.h" +#include "utilities/fault_injection_env.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { static const int kValueSize = 1000; -namespace { -std::string RandomString(Random* rnd, int len) { - std::string r; - test::RandomString(rnd, len, &r); - return r; -} -} // anonymous namespace - // counts how many operations were performed -class EnvCounter : public EnvWrapper { +class EnvCounter : public SpecialEnv { public: explicit EnvCounter(Env* base) - : EnvWrapper(base), num_new_writable_file_(0) {} + : SpecialEnv(base), num_new_writable_file_(0) {} int GetNumberOfNewWritableFileCalls() { return num_new_writable_file_; } @@ -64,33 +56,30 @@ public: explicit ColumnFamilyTestBase(uint32_t format) : rnd_(139), format_(format) { Env* base_env = Env::Default(); -#ifndef ROCKSDB_LITE - const char* test_env_uri = getenv("TEST_ENV_URI"); - if (test_env_uri) { - Env* test_env = nullptr; - Status s = Env::LoadEnv(test_env_uri, &test_env, &env_guard_); - base_env = test_env; - EXPECT_OK(s); - EXPECT_NE(Env::Default(), base_env); - } -#endif // !ROCKSDB_LITE + EXPECT_OK( + test::CreateEnvFromSystem(ConfigOptions(), &base_env, &env_guard_)); EXPECT_NE(nullptr, base_env); env_ = new EnvCounter(base_env); + env_->skip_fsync_ = true; dbname_ = test::PerThreadDBPath("column_family_test"); db_options_.create_if_missing = true; db_options_.fail_if_options_file_error = true; db_options_.env = env_; - DestroyDB(dbname_, Options(db_options_, column_family_options_)); + EXPECT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_))); } ~ColumnFamilyTestBase() override { std::vector column_families; for (auto h : handles_) { ColumnFamilyDescriptor cfdescriptor; - h->GetDescriptor(&cfdescriptor); + Status s = h->GetDescriptor(&cfdescriptor); +#ifdef ROCKSDB_LITE + EXPECT_TRUE(s.IsNotSupported()); +#else + EXPECT_OK(s); +#endif // ROCKSDB_LITE column_families.push_back(cfdescriptor); } - Close(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); Destroy(column_families); delete env_; @@ -109,11 +98,11 @@ // preserves the implementation that was in place when all of the // magic values in this file were picked. *storage = std::string(kValueSize, ' '); - return Slice(*storage); } else { Random r(k); - return test::RandomString(&r, kValueSize, storage); + *storage = r.RandomString(kValueSize); } + return Slice(*storage); } void Build(int base, int n, int flush_every = 0) { @@ -122,7 +111,7 @@ for (int i = 0; i < n; i++) { if (flush_every != 0 && i != 0 && i % flush_every == 0) { - DBImpl* dbi = reinterpret_cast(db_); + DBImpl* dbi = static_cast_with_check(db_); dbi->TEST_FlushMemTable(); } @@ -176,7 +165,7 @@ void Close() { for (auto h : handles_) { if (h) { - db_->DestroyColumnFamilyHandle(h); + ASSERT_OK(db_->DestroyColumnFamilyHandle(h)); } } handles_.clear(); @@ -190,8 +179,8 @@ std::vector column_families; names_.clear(); for (size_t i = 0; i < cf.size(); ++i) { - column_families.push_back(ColumnFamilyDescriptor( - cf[i], options.size() == 0 ? column_family_options_ : options[i])); + column_families.emplace_back( + cf[i], options.size() == 0 ? column_family_options_ : options[i]); names_.push_back(cf[i]); } return DB::Open(db_options_, dbname_, column_families, &handles_, &db_); @@ -202,8 +191,8 @@ std::vector column_families; names_.clear(); for (size_t i = 0; i < cf.size(); ++i) { - column_families.push_back(ColumnFamilyDescriptor( - cf[i], options.size() == 0 ? column_family_options_ : options[i])); + column_families.emplace_back( + cf[i], options.size() == 0 ? column_family_options_ : options[i]); names_.push_back(cf[i]); } return DB::OpenForReadOnly(db_options_, dbname_, column_families, &handles_, @@ -227,7 +216,7 @@ Open({"default"}); } - DBImpl* dbfull() { return reinterpret_cast(db_); } + DBImpl* dbfull() { return static_cast_with_check(db_); } int GetProperty(int cf, std::string property) { std::string value; @@ -287,7 +276,11 @@ // Verify the CF options of the returned CF handle. ColumnFamilyDescriptor desc; ASSERT_OK(handles_[cfi]->GetDescriptor(&desc)); - RocksDBOptionsParser::VerifyCFOptions(desc.options, current_cf_opt); + // Need to sanitize the default column family options before comparing + // them. + ASSERT_OK(RocksDBOptionsParser::VerifyCFOptions( + ConfigOptions(), desc.options, + SanitizeOptions(dbfull()->immutable_db_options(), current_cf_opt))); #endif // !ROCKSDB_LITE cfi++; } @@ -313,7 +306,7 @@ void DropColumnFamilies(const std::vector& cfs) { for (auto cf : cfs) { ASSERT_OK(db_->DropColumnFamily(handles_[cf])); - db_->DestroyColumnFamilyHandle(handles_[cf]); + ASSERT_OK(db_->DestroyColumnFamilyHandle(handles_[cf])); handles_[cf] = nullptr; names_[cf] = ""; } @@ -327,14 +320,14 @@ // 10 bytes for key, rest is value if (!save) { ASSERT_OK(Put(cf, test::RandomKey(&rnd_, 11), - RandomString(&rnd_, key_value_size - 10))); + rnd_.RandomString(key_value_size - 10))); } else { std::string key = test::RandomKey(&rnd_, 11); keys_[cf].insert(key); - ASSERT_OK(Put(cf, key, RandomString(&rnd_, key_value_size - 10))); + ASSERT_OK(Put(cf, key, rnd_.RandomString(key_value_size - 10))); } } - db_->FlushWAL(false); + ASSERT_OK(db_->FlushWAL(/*sync=*/false)); } #ifndef ROCKSDB_LITE // TEST functions in DB are not supported in lite @@ -561,14 +554,14 @@ INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest, testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest, - testing::Values(test::kLatestFormatVersion)); + testing::Values(kLatestFormatVersion)); TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) { for (int iter = 0; iter < 3; ++iter) { Open(); CreateColumnFamilies({"one", "two", "three"}); for (size_t i = 0; i < handles_.size(); ++i) { - auto cfh = reinterpret_cast(handles_[i]); + auto cfh = static_cast_with_check(handles_[i]); ASSERT_EQ(i, cfh->GetID()); } if (iter == 1) { @@ -584,7 +577,7 @@ CreateColumnFamilies({"three2"}); // ID 3 that was used for dropped column family "three" should not be // reused - auto cfh3 = reinterpret_cast(handles_[3]); + auto cfh3 = static_cast_with_check(handles_[3]); ASSERT_EQ(4U, cfh3->GetID()); Close(); Destroy(); @@ -652,11 +645,11 @@ // after flushing file B is deleted. At the same time, the min log number of // default CF is not written to manifest. Log file A still remains. // Flushed to SST file Y. - Flush(1); - Flush(0); + ASSERT_OK(Flush(1)); + ASSERT_OK(Flush(0)); ASSERT_OK(Put(1, "bar", "v3")); // seqID 4 ASSERT_OK(Put(1, "foo", "v4")); // seqID 5 - db_->FlushWAL(false); + ASSERT_OK(db_->FlushWAL(/*sync=*/false)); // Preserve file system state up to here to simulate a crash condition. fault_env->SetFilesystemActive(false); @@ -707,19 +700,19 @@ // and is set to current. Both CFs' min log number is set to file C so after // flushing file B is deleted. Log file A still remains. // Flushed to SST file Y. - Flush(1); + ASSERT_OK(Flush(1)); ASSERT_OK(Put(0, "bar", "v2")); // seqID 4 ASSERT_OK(Put(2, "bar", "v2")); // seqID 5 ASSERT_OK(Put(1, "bar", "v3")); // seqID 6 // Flushing all column families. This forces all CFs' min log to current. This // is written to the manifest file. Log file C is cleared. - Flush(0); - Flush(1); - Flush(2); + ASSERT_OK(Flush(0)); + ASSERT_OK(Flush(1)); + ASSERT_OK(Flush(2)); // Write to log file D ASSERT_OK(Put(1, "bar", "v4")); // seqID 7 ASSERT_OK(Put(1, "bar", "v5")); // seqID 8 - db_->FlushWAL(false); + ASSERT_OK(db_->FlushWAL(/*sync=*/false)); // Preserve file system state up to here to simulate a crash condition. fault_env->SetFilesystemActive(false); std::vector names; @@ -753,8 +746,8 @@ std::make_tuple(test::kDefaultFormatVersion, false))); INSTANTIATE_TEST_CASE_P( FormatLatest, FlushEmptyCFTestWithParam, - testing::Values(std::make_tuple(test::kLatestFormatVersion, true), - std::make_tuple(test::kLatestFormatVersion, false))); + testing::Values(std::make_tuple(kLatestFormatVersion, true), + std::make_tuple(kLatestFormatVersion, false))); TEST_P(ColumnFamilyTest, AddDrop) { Open(); @@ -821,7 +814,7 @@ } TEST_P(ColumnFamilyTest, DropTest) { - // first iteration - dont reopen DB before dropping + // first iteration - don't reopen DB before dropping // second iteration - reopen DB before dropping for (int iter = 0; iter < 2; ++iter) { Open({"default"}); @@ -848,13 +841,15 @@ Open(); CreateColumnFamiliesAndReopen({"one", "two"}); WriteBatch batch; - batch.Put(handles_[0], Slice("existing"), Slice("column-family")); - batch.Put(handles_[1], Slice("non-existing"), Slice("column-family")); + ASSERT_OK(batch.Put(handles_[0], Slice("existing"), Slice("column-family"))); + ASSERT_OK( + batch.Put(handles_[1], Slice("non-existing"), Slice("column-family"))); ASSERT_OK(db_->Write(WriteOptions(), &batch)); DropColumnFamilies({1}); WriteOptions woptions_ignore_missing_cf; woptions_ignore_missing_cf.ignore_missing_column_families = true; - batch.Put(handles_[0], Slice("still here"), Slice("column-family")); + ASSERT_OK( + batch.Put(handles_[0], Slice("still here"), Slice("column-family"))); ASSERT_OK(db_->Write(woptions_ignore_missing_cf, &batch)); ASSERT_EQ("column-family", Get(0, "still here")); Status s = db_->Write(WriteOptions(), &batch); @@ -893,11 +888,9 @@ ASSERT_OK(env_->CreateDirIfMissing(dbname_)); ASSERT_OK(env_->CreateDirIfMissing(backup_logs)); std::vector old_files; - env_->GetChildren(backup_logs, &old_files); + ASSERT_OK(env_->GetChildren(backup_logs, &old_files)); for (auto& file : old_files) { - if (file != "." && file != "..") { - env_->DeleteFile(backup_logs + "/" + file); - } + ASSERT_OK(env_->DeleteFile(backup_logs + "/" + file)); } column_family_options_.merge_operator = @@ -924,11 +917,9 @@ // copy the logs to backup std::vector logs; - env_->GetChildren(db_options_.wal_dir, &logs); + ASSERT_OK(env_->GetChildren(db_options_.wal_dir, &logs)); for (auto& log : logs) { - if (log != ".." && log != ".") { - CopyFile(db_options_.wal_dir + "/" + log, backup_logs + "/" + log); - } + CopyFile(db_options_.wal_dir + "/" + log, backup_logs + "/" + log); } // recover the DB @@ -953,9 +944,7 @@ if (iter == 0) { // copy the logs from backup back to wal dir for (auto& log : logs) { - if (log != ".." && log != ".") { - CopyFile(backup_logs + "/" + log, db_options_.wal_dir + "/" + log); - } + CopyFile(backup_logs + "/" + log, db_options_.wal_dir + "/" + log); } } } @@ -982,13 +971,14 @@ for (int i = 0; i < 3; ++i) { uint64_t max_total_in_memory_state = MaxTotalInMemoryState(); - Flush(i); + ASSERT_OK(Flush(i)); AssertMaxTotalInMemoryState(max_total_in_memory_state); } ASSERT_OK(Put(1, "foofoo", "bar")); ASSERT_OK(Put(0, "foofoo", "bar")); for (auto* it : iterators) { + ASSERT_OK(it->status()); delete it; } } @@ -1086,10 +1076,10 @@ CreateColumnFamilies({"one"}); WriteBatch batch; - batch.Put(handles_[0], Slice("foo"), Slice("bar")); - batch.Put(handles_[1], Slice("foo"), Slice("bar")); + ASSERT_OK(batch.Put(handles_[0], Slice("foo"), Slice("bar"))); + ASSERT_OK(batch.Put(handles_[1], Slice("foo"), Slice("bar"))); ASSERT_OK(db_->Write(WriteOptions(), &batch)); - Flush(0); + ASSERT_OK(Flush(0)); fault_env->SetFilesystemActive(false); std::vector names; @@ -1099,7 +1089,7 @@ } } Close(); - fault_env->DropUnsyncedFileData(); + ASSERT_OK(fault_env->DropUnsyncedFileData()); fault_env->ResetState(); Open(names, {}); @@ -2073,6 +2063,7 @@ if (iter->Valid()) { result = iter->key().ToString() + "->" + iter->value().ToString(); } else { + EXPECT_OK(iter->status()); result = "(invalid)"; } return result; @@ -2231,7 +2222,7 @@ // files for column family [one], because it's empty AssertCountLiveFiles(4); - Flush(0); + ASSERT_OK(Flush(0)); ASSERT_EQ(0, dbfull()->TEST_total_log_size()); Close(); } @@ -2287,6 +2278,8 @@ // not a multiple of 4k, round up 4k expected_arena_block_size += 4 * 1024; } + expected_arena_block_size = + std::min(size_t{1024 * 1024}, expected_arena_block_size); ASSERT_EQ(expected_arena_block_size, result.arena_block_size); } } @@ -2327,7 +2320,7 @@ ASSERT_OK(db_->DropColumnFamily(handles_[2])); } else { // delete CF two - db_->DestroyColumnFamilyHandle(handles_[2]); + ASSERT_OK(db_->DestroyColumnFamilyHandle(handles_[2])); handles_[2] = nullptr; } // Make sure iterator created can still be used. @@ -2383,7 +2376,6 @@ // 1MB should create ~10 files for each CF int kKeysNum = 10000; PutRandomData(1, kKeysNum, 100); - { std::unique_ptr iterator( db_->NewIterator(ReadOptions(), handles_[1])); @@ -2430,6 +2422,9 @@ env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); + // Make sure the task is sleeping. Otherwise, it might start to execute + // after sleeping_task.WaitUntilDone() and cause TSAN warning. + sleeping_task.WaitUntilSleeping(); // 1MB should create ~10 files for each CF int kKeysNum = 10000; @@ -2444,6 +2439,9 @@ // now we sleep again. this is just so we're certain that flush job finished env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); + // Make sure the task is sleeping. Otherwise, it might start to execute + // after sleeping_task.WaitUntilDone() and cause TSAN warning. + sleeping_task.WaitUntilSleeping(); sleeping_task.WakeUp(); sleeping_task.WaitUntilDone(); @@ -2977,7 +2975,8 @@ SpecialEnv env(Env::Default()); db_options_.env = &env; db_options_.max_background_flushes = 1; - column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2)); + column_family_options_.memtable_factory.reset( + test::NewSpecialSkipListFactory(2)); Open(); CreateColumnFamilies({"one"}); ASSERT_OK(Put(1, "fodor", "mirko")); @@ -2993,6 +2992,9 @@ test::SleepingBackgroundTask sleeping_task; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); + // Make sure the task is sleeping. Otherwise, it might start to execute + // after sleeping_task.WaitUntilDone() and cause TSAN warning. + sleeping_task.WaitUntilSleeping(); WriteOptions wo; wo.sync = true; @@ -3019,14 +3021,16 @@ SpecialEnv env(Env::Default()); db_options_.env = &env; db_options_.max_background_flushes = 1; - column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2)); + column_family_options_.memtable_factory.reset( + test::NewSpecialSkipListFactory(2)); Open(); CreateColumnFamilies({"one"}); ASSERT_OK(Put(1, "fodor", "mirko")); // Create an iterator holding the current super version. Iterator* it = db_->NewIterator(ReadOptions(), handles_[1]); + ASSERT_OK(it->status()); // A flush will make `it` hold the last reference of its super version. - Flush(1); + ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(0, "fodor", "mirko")); @@ -3038,6 +3042,9 @@ test::SleepingBackgroundTask sleeping_task; env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task, Env::Priority::HIGH); + // Make sure the task is sleeping. Otherwise, it might start to execute + // after sleeping_task.WaitUntilDone() and cause TSAN warning. + sleeping_task.WaitUntilSleeping(); WriteOptions wo; wo.sync = true; @@ -3066,7 +3073,8 @@ env.SetBackgroundThreads(2, Env::HIGH); db_options_.env = &env; db_options_.max_background_flushes = 1; - column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2)); + column_family_options_.memtable_factory.reset( + test::NewSpecialSkipListFactory(2)); Open(); CreateColumnFamilies({"one"}); ASSERT_OK(Put(1, "fodor", "mirko")); @@ -3074,8 +3082,9 @@ ReadOptions ro; ro.background_purge_on_iterator_cleanup = true; Iterator* it = db_->NewIterator(ro, handles_[1]); + ASSERT_OK(it->status()); // A flush will make `it` hold the last reference of its super version. - Flush(1); + ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(0, "fodor", "mirko")); @@ -3123,13 +3132,14 @@ env.SetBackgroundThreads(2, Env::HIGH); db_options_.env = &env; db_options_.max_background_flushes = 1; - column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(3)); + column_family_options_.memtable_factory.reset( + test::NewSpecialSkipListFactory(3)); column_family_options_.level0_file_num_compaction_trigger = 2; Open(); CreateColumnFamilies({"one"}); ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(1, "fodar2", "mirko")); - Flush(1); + ASSERT_OK(Flush(1)); // Create an iterator holding the current super version, as well as // the SST file just flushed. @@ -3141,7 +3151,7 @@ ASSERT_OK(Put(1, "fodor", "mirko")); ASSERT_OK(Put(1, "fodar2", "mirko")); - Flush(1); + ASSERT_OK(Flush(1)); WaitForCompaction(); @@ -3168,6 +3178,8 @@ // Deleting the iterator will clear its super version, triggering // closing all files it->Seek(""); + ASSERT_OK(it->status()); + ASSERT_EQ(2, env.num_open_wal_file_.load()); ASSERT_EQ(0, env.delete_count_.load()); @@ -3198,8 +3210,8 @@ Open(); CreateColumnFamiliesAndReopen({"one", "two"}); - Put(0, "", ""); - Put(1, "foo", "bar"); + ASSERT_OK(Put(0, "", "")); + ASSERT_OK(Put(1, "foo", "bar")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"DBImpl::SyncWAL:BeforeMarkLogsSynced:1", @@ -3209,12 +3221,12 @@ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - ROCKSDB_NAMESPACE::port::Thread thread([&] { db_->SyncWAL(); }); + ROCKSDB_NAMESPACE::port::Thread thread([&] { ASSERT_OK(db_->SyncWAL()); }); TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:1"); - Flush(1); - Put(1, "foo", "bar"); - Flush(1); + ASSERT_OK(Flush(1)); + ASSERT_OK(Put(1, "foo", "bar")); + ASSERT_OK(Flush(1)); TEST_SYNC_POINT("ColumnFamilyTest::LogSyncConflictFlush:2"); @@ -3236,7 +3248,7 @@ Build(0, 100); // Flush the 0th column family to force a roll of the wal log - Flush(0); + ASSERT_OK(Flush(0)); // Add some more entries Build(100, 100); @@ -3251,7 +3263,7 @@ FileType type; if (!(ParseFileName(filenames[i], &number, &type))) continue; - if (type != kLogFile) continue; + if (type != kWalFile) continue; logfs.push_back(filenames[i]); } @@ -3296,7 +3308,7 @@ Close(); // cleanup - env_->DeleteDir(backup_logs); + ASSERT_OK(env_->DeleteDir(backup_logs)); } TEST_P(ColumnFamilyTest, DefaultCfPathsTest) { @@ -3312,14 +3324,14 @@ // Fill Column family 1. PutRandomData(1, 100, 100); - Flush(1); + ASSERT_OK(Flush(1)); ASSERT_EQ(1, GetSstFileCount(cf_opt1.cf_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Fill column family 2 PutRandomData(2, 100, 100); - Flush(2); + ASSERT_OK(Flush(2)); // SST from Column family 2 should be generated in // db_paths which is dbname_ in this case. @@ -3338,29 +3350,31 @@ Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2}); PutRandomData(1, 100, 100, true /* save */); - Flush(1); + ASSERT_OK(Flush(1)); // Check that files are generated in appropriate paths. ASSERT_EQ(1, GetSstFileCount(cf_opt1.cf_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); PutRandomData(2, 100, 100, true /* save */); - Flush(2); + ASSERT_OK(Flush(2)); ASSERT_EQ(1, GetSstFileCount(cf_opt2.cf_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_)); // Re-open and verify the keys. Reopen({ColumnFamilyOptions(), cf_opt1, cf_opt2}); - DBImpl* dbi = reinterpret_cast(db_); + DBImpl* dbi = static_cast_with_check(db_); for (int cf = 1; cf != 3; ++cf) { ReadOptions read_options; read_options.readahead_size = 0; auto it = dbi->NewIterator(read_options, handles_[cf]); for (it->SeekToFirst(); it->Valid(); it->Next()) { + ASSERT_OK(it->status()); Slice key(it->key()); ASSERT_NE(keys_[cf].end(), keys_[cf].find(key.ToString())); } + ASSERT_OK(it->status()); delete it; for (const auto& key : keys_[cf]) { @@ -3369,15 +3383,55 @@ } } -} // namespace ROCKSDB_NAMESPACE +TEST(ColumnFamilyTest, ValidateBlobGCCutoff) { + DBOptions db_options; -#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS -extern "C" { -void RegisterCustomObjects(int argc, char** argv); + ColumnFamilyOptions cf_options; + cf_options.enable_blob_garbage_collection = true; + + cf_options.blob_garbage_collection_age_cutoff = -0.5; + ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) + .IsInvalidArgument()); + + cf_options.blob_garbage_collection_age_cutoff = 0.0; + ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); + + cf_options.blob_garbage_collection_age_cutoff = 0.5; + ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); + + cf_options.blob_garbage_collection_age_cutoff = 1.0; + ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); + + cf_options.blob_garbage_collection_age_cutoff = 1.5; + ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) + .IsInvalidArgument()); } -#else -void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} -#endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS + +TEST(ColumnFamilyTest, ValidateBlobGCForceThreshold) { + DBOptions db_options; + + ColumnFamilyOptions cf_options; + cf_options.enable_blob_garbage_collection = true; + + cf_options.blob_garbage_collection_force_threshold = -0.5; + ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) + .IsInvalidArgument()); + + cf_options.blob_garbage_collection_force_threshold = 0.0; + ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); + + cf_options.blob_garbage_collection_force_threshold = 0.5; + ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); + + cf_options.blob_garbage_collection_force_threshold = 1.0; + ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); + + cf_options.blob_garbage_collection_force_threshold = 1.5; + ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options) + .IsInvalidArgument()); +} + +} // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compact_files_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compact_files_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compact_files_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compact_files_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -16,6 +16,7 @@ #include "rocksdb/env.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" +#include "util/cast_util.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { @@ -90,9 +91,9 @@ // create couple files // Background compaction starts and waits in BackgroundCallCompaction:0 for (int i = 0; i < kLevel0Trigger * 4; ++i) { - db->Put(WriteOptions(), ToString(i), ""); - db->Put(WriteOptions(), ToString(100 - i), ""); - db->Flush(FlushOptions()); + ASSERT_OK(db->Put(WriteOptions(), ToString(i), "")); + ASSERT_OK(db->Put(WriteOptions(), ToString(100 - i), "")); + ASSERT_OK(db->Flush(FlushOptions())); } ROCKSDB_NAMESPACE::ColumnFamilyMetaData meta; @@ -117,6 +118,78 @@ delete db; } +TEST_F(CompactFilesTest, MultipleLevel) { + Options options; + options.create_if_missing = true; + options.level_compaction_dynamic_level_bytes = true; + options.num_levels = 6; + // Add listener + FlushedFileCollector* collector = new FlushedFileCollector(); + options.listeners.emplace_back(collector); + + DB* db = nullptr; + DestroyDB(db_name_, options); + Status s = DB::Open(options, db_name_, &db); + ASSERT_OK(s); + ASSERT_NE(db, nullptr); + + // create couple files in L0, L3, L4 and L5 + for (int i = 5; i > 2; --i) { + collector->ClearFlushedFiles(); + ASSERT_OK(db->Put(WriteOptions(), ToString(i), "")); + ASSERT_OK(db->Flush(FlushOptions())); + auto l0_files = collector->GetFlushedFiles(); + ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, i)); + + std::string prop; + ASSERT_TRUE( + db->GetProperty("rocksdb.num-files-at-level" + ToString(i), &prop)); + ASSERT_EQ("1", prop); + } + ASSERT_OK(db->Put(WriteOptions(), ToString(0), "")); + ASSERT_OK(db->Flush(FlushOptions())); + + ColumnFamilyMetaData meta; + db->GetColumnFamilyMetaData(&meta); + // Compact files except the file in L3 + std::vector files; + for (int i = 0; i < 6; ++i) { + if (i == 3) continue; + for (auto& file : meta.levels[i].files) { + files.push_back(file.db_path + "/" + file.name); + } + } + + SyncPoint::GetInstance()->LoadDependency({ + {"CompactionJob::Run():Start", "CompactFilesTest.MultipleLevel:0"}, + {"CompactFilesTest.MultipleLevel:1", "CompactFilesImpl:3"}, + }); + SyncPoint::GetInstance()->EnableProcessing(); + + std::thread thread([&] { + TEST_SYNC_POINT("CompactFilesTest.MultipleLevel:0"); + ASSERT_OK(db->Put(WriteOptions(), "bar", "v2")); + ASSERT_OK(db->Put(WriteOptions(), "foo", "v2")); + ASSERT_OK(db->Flush(FlushOptions())); + TEST_SYNC_POINT("CompactFilesTest.MultipleLevel:1"); + }); + + // Compaction cannot move up the data to higher level + // here we have input file from level 5, so the output level has to be >= 5 + for (int invalid_output_level = 0; invalid_output_level < 5; + invalid_output_level++) { + s = db->CompactFiles(CompactionOptions(), files, invalid_output_level); + std::cout << s.ToString() << std::endl; + ASSERT_TRUE(s.IsInvalidArgument()); + } + + ASSERT_OK(db->CompactFiles(CompactionOptions(), files, 5)); + SyncPoint::GetInstance()->DisableProcessing(); + thread.join(); + + delete db; +} + TEST_F(CompactFilesTest, ObsoleteFiles) { Options options; // to trigger compaction more easily @@ -137,18 +210,18 @@ DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); - assert(s.ok()); - assert(db); + ASSERT_OK(s); + ASSERT_NE(db, nullptr); // create couple files for (int i = 1000; i < 2000; ++i) { - db->Put(WriteOptions(), ToString(i), - std::string(kWriteBufferSize / 10, 'a' + (i % 26))); + ASSERT_OK(db->Put(WriteOptions(), ToString(i), + std::string(kWriteBufferSize / 10, 'a' + (i % 26)))); } auto l0_files = collector->GetFlushedFiles(); ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, 1)); - reinterpret_cast(db)->TEST_WaitForCompact(); + ASSERT_OK(static_cast_with_check(db)->TEST_WaitForCompact()); // verify all compaction input files are deleted for (auto fname : l0_files) { @@ -181,15 +254,17 @@ // create couple files for (int i = 0; i < 500; ++i) { - db->Put(WriteOptions(), ToString(i), std::string(1000, 'a' + (i % 26))); + ASSERT_OK(db->Put(WriteOptions(), ToString(i), + std::string(1000, 'a' + (i % 26)))); } - reinterpret_cast(db)->TEST_WaitForFlushMemTable(); + ASSERT_OK(static_cast_with_check(db)->TEST_WaitForFlushMemTable()); auto l0_files_1 = collector->GetFlushedFiles(); collector->ClearFlushedFiles(); for (int i = 0; i < 500; ++i) { - db->Put(WriteOptions(), ToString(i), std::string(1000, 'a' + (i % 26))); + ASSERT_OK(db->Put(WriteOptions(), ToString(i), + std::string(1000, 'a' + (i % 26)))); } - reinterpret_cast(db)->TEST_WaitForFlushMemTable(); + ASSERT_OK(static_cast_with_check(db)->TEST_WaitForFlushMemTable()); auto l0_files_2 = collector->GetFlushedFiles(); ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files_1, 0)); ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files_2, 0)); @@ -212,13 +287,13 @@ DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); - assert(s.ok()); + ASSERT_OK(s); assert(db); // Create 5 files. for (int i = 0; i < 5; ++i) { - db->Put(WriteOptions(), "key" + ToString(i), "value"); - db->Flush(FlushOptions()); + ASSERT_OK(db->Put(WriteOptions(), "key" + ToString(i), "value")); + ASSERT_OK(db->Flush(FlushOptions())); } auto l0_files = collector->GetFlushedFiles(); @@ -236,8 +311,8 @@ // In the meantime flush another file. TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:0"); - db->Put(WriteOptions(), "key5", "value"); - db->Flush(FlushOptions()); + ASSERT_OK(db->Put(WriteOptions(), "key5", "value")); + ASSERT_OK(db->Flush(FlushOptions())); TEST_SYNC_POINT("CompactFilesTest.CapturingPendingFiles:1"); compaction_thread.join(); @@ -248,7 +323,7 @@ // Make sure we can reopen the DB. s = DB::Open(options, db_name_, &db); - ASSERT_TRUE(s.ok()); + ASSERT_OK(s); assert(db); delete db; } @@ -292,8 +367,8 @@ cf->SetDB(db); // Write one L0 file - db->Put(WriteOptions(), "K1", "V1"); - db->Flush(FlushOptions()); + ASSERT_OK(db->Put(WriteOptions(), "K1", "V1")); + ASSERT_OK(db->Flush(FlushOptions())); // Compact all L0 files using CompactFiles ROCKSDB_NAMESPACE::ColumnFamilyMetaData meta; @@ -336,8 +411,8 @@ DB* db = nullptr; ASSERT_OK(DB::Open(options, db_name_, &db)); - db->Put(WriteOptions(), "key", "val"); - db->Flush(FlushOptions()); + ASSERT_OK(db->Put(WriteOptions(), "key", "val")); + ASSERT_OK(db->Flush(FlushOptions())); auto l0_files = collector->GetFlushedFiles(); ASSERT_EQ(1, l0_files.size()); @@ -376,14 +451,15 @@ DB* db = nullptr; DestroyDB(db_name_, options); Status s = DB::Open(options, db_name_, &db); - assert(s.ok()); + ASSERT_OK(s); assert(db); // create couple files for (int i = 0; i < 500; ++i) { - db->Put(WriteOptions(), ToString(i), std::string(1000, 'a' + (i % 26))); + ASSERT_OK(db->Put(WriteOptions(), ToString(i), + std::string(1000, 'a' + (i % 26)))); } - reinterpret_cast(db)->TEST_WaitForFlushMemTable(); + ASSERT_OK(static_cast_with_check(db)->TEST_WaitForFlushMemTable()); auto l0_files_1 = collector->GetFlushedFiles(); CompactionOptions co; co.compression = CompressionType::kLZ4Compression; diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compacted_db_impl.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compacted_db_impl.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compacted_db_impl.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compacted_db_impl.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,160 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#ifndef ROCKSDB_LITE -#include "db/compacted_db_impl.h" -#include "db/db_impl/db_impl.h" -#include "db/version_set.h" -#include "table/get_context.h" - -namespace ROCKSDB_NAMESPACE { - -extern void MarkKeyMayExist(void* arg); -extern bool SaveValue(void* arg, const ParsedInternalKey& parsed_key, - const Slice& v, bool hit_and_return); - -CompactedDBImpl::CompactedDBImpl( - const DBOptions& options, const std::string& dbname) - : DBImpl(options, dbname), cfd_(nullptr), version_(nullptr), - user_comparator_(nullptr) { -} - -CompactedDBImpl::~CompactedDBImpl() { -} - -size_t CompactedDBImpl::FindFile(const Slice& key) { - size_t right = files_.num_files - 1; - auto cmp = [&](const FdWithKeyRange& f, const Slice& k) -> bool { - return user_comparator_->Compare(ExtractUserKey(f.largest_key), k) < 0; - }; - return static_cast(std::lower_bound(files_.files, - files_.files + right, key, cmp) - files_.files); -} - -Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*, - const Slice& key, PinnableSlice* value) { - GetContext get_context(user_comparator_, nullptr, nullptr, nullptr, - GetContext::kNotFound, key, value, nullptr, nullptr, - true, nullptr, nullptr); - LookupKey lkey(key, kMaxSequenceNumber); - files_.files[FindFile(key)].fd.table_reader->Get(options, lkey.internal_key(), - &get_context, nullptr); - if (get_context.State() == GetContext::kFound) { - return Status::OK(); - } - return Status::NotFound(); -} - -std::vector CompactedDBImpl::MultiGet(const ReadOptions& options, - const std::vector&, - const std::vector& keys, std::vector* values) { - autovector reader_list; - for (const auto& key : keys) { - const FdWithKeyRange& f = files_.files[FindFile(key)]; - if (user_comparator_->Compare(key, ExtractUserKey(f.smallest_key)) < 0) { - reader_list.push_back(nullptr); - } else { - LookupKey lkey(key, kMaxSequenceNumber); - f.fd.table_reader->Prepare(lkey.internal_key()); - reader_list.push_back(f.fd.table_reader); - } - } - std::vector statuses(keys.size(), Status::NotFound()); - values->resize(keys.size()); - int idx = 0; - for (auto* r : reader_list) { - if (r != nullptr) { - PinnableSlice pinnable_val; - std::string& value = (*values)[idx]; - GetContext get_context(user_comparator_, nullptr, nullptr, nullptr, - GetContext::kNotFound, keys[idx], &pinnable_val, - nullptr, nullptr, true, nullptr, nullptr); - LookupKey lkey(keys[idx], kMaxSequenceNumber); - r->Get(options, lkey.internal_key(), &get_context, nullptr); - value.assign(pinnable_val.data(), pinnable_val.size()); - if (get_context.State() == GetContext::kFound) { - statuses[idx] = Status::OK(); - } - } - ++idx; - } - return statuses; -} - -Status CompactedDBImpl::Init(const Options& options) { - SuperVersionContext sv_context(/* create_superversion */ true); - mutex_.Lock(); - ColumnFamilyDescriptor cf(kDefaultColumnFamilyName, - ColumnFamilyOptions(options)); - Status s = Recover({cf}, true /* read only */, false, true); - if (s.ok()) { - cfd_ = reinterpret_cast( - DefaultColumnFamily())->cfd(); - cfd_->InstallSuperVersion(&sv_context, &mutex_); - } - mutex_.Unlock(); - sv_context.Clean(); - if (!s.ok()) { - return s; - } - NewThreadStatusCfInfo(cfd_); - version_ = cfd_->GetSuperVersion()->current; - user_comparator_ = cfd_->user_comparator(); - auto* vstorage = version_->storage_info(); - if (vstorage->num_non_empty_levels() == 0) { - return Status::NotSupported("no file exists"); - } - const LevelFilesBrief& l0 = vstorage->LevelFilesBrief(0); - // L0 should not have files - if (l0.num_files > 1) { - return Status::NotSupported("L0 contain more than 1 file"); - } - if (l0.num_files == 1) { - if (vstorage->num_non_empty_levels() > 1) { - return Status::NotSupported("Both L0 and other level contain files"); - } - files_ = l0; - return Status::OK(); - } - - for (int i = 1; i < vstorage->num_non_empty_levels() - 1; ++i) { - if (vstorage->LevelFilesBrief(i).num_files > 0) { - return Status::NotSupported("Other levels also contain files"); - } - } - - int level = vstorage->num_non_empty_levels() - 1; - if (vstorage->LevelFilesBrief(level).num_files > 0) { - files_ = vstorage->LevelFilesBrief(level); - return Status::OK(); - } - return Status::NotSupported("no file exists"); -} - -Status CompactedDBImpl::Open(const Options& options, - const std::string& dbname, DB** dbptr) { - *dbptr = nullptr; - - if (options.max_open_files != -1) { - return Status::InvalidArgument("require max_open_files = -1"); - } - if (options.merge_operator.get() != nullptr) { - return Status::InvalidArgument("merge operator is not supported"); - } - DBOptions db_options(options); - std::unique_ptr db(new CompactedDBImpl(db_options, dbname)); - Status s = db->Init(options); - if (s.ok()) { - db->StartTimedTasks(); - ROCKS_LOG_INFO(db->immutable_db_options_.info_log, - "Opened the db as fully compacted mode"); - LogFlush(db->immutable_db_options_.info_log); - *dbptr = db.release(); - } - return s; -} - -} // namespace ROCKSDB_NAMESPACE -#endif // ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compacted_db_impl.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compacted_db_impl.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compacted_db_impl.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compacted_db_impl.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,113 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). - -#pragma once -#ifndef ROCKSDB_LITE -#include -#include -#include "db/db_impl/db_impl.h" - -namespace ROCKSDB_NAMESPACE { - -class CompactedDBImpl : public DBImpl { - public: - CompactedDBImpl(const DBOptions& options, const std::string& dbname); - // No copying allowed - CompactedDBImpl(const CompactedDBImpl&) = delete; - void operator=(const CompactedDBImpl&) = delete; - - virtual ~CompactedDBImpl(); - - static Status Open(const Options& options, const std::string& dbname, - DB** dbptr); - - // Implementations of the DB interface - using DB::Get; - virtual Status Get(const ReadOptions& options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value) override; - using DB::MultiGet; - virtual std::vector MultiGet( - const ReadOptions& options, - const std::vector&, - const std::vector& keys, std::vector* values) - override; - - using DBImpl::Put; - virtual Status Put(const WriteOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/, - const Slice& /*key*/, const Slice& /*value*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - using DBImpl::Merge; - virtual Status Merge(const WriteOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/, - const Slice& /*key*/, const Slice& /*value*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - using DBImpl::Delete; - virtual Status Delete(const WriteOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/, - const Slice& /*key*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - virtual Status Write(const WriteOptions& /*options*/, - WriteBatch* /*updates*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - using DBImpl::CompactRange; - virtual Status CompactRange(const CompactRangeOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/, - const Slice* /*begin*/, - const Slice* /*end*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - - virtual Status DisableFileDeletions() override { - return Status::NotSupported("Not supported in compacted db mode."); - } - virtual Status EnableFileDeletions(bool /*force*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - virtual Status GetLiveFiles(std::vector& ret, - uint64_t* manifest_file_size, - bool /*flush_memtable*/) override { - return DBImpl::GetLiveFiles(ret, manifest_file_size, - false /* flush_memtable */); - } - using DBImpl::Flush; - virtual Status Flush(const FlushOptions& /*options*/, - ColumnFamilyHandle* /*column_family*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - using DB::IngestExternalFile; - virtual Status IngestExternalFile( - ColumnFamilyHandle* /*column_family*/, - const std::vector& /*external_files*/, - const IngestExternalFileOptions& /*ingestion_options*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - using DB::CreateColumnFamilyWithImport; - virtual Status CreateColumnFamilyWithImport( - const ColumnFamilyOptions& /*options*/, - const std::string& /*column_family_name*/, - const ImportColumnFamilyOptions& /*import_options*/, - const ExportImportFilesMetaData& /*metadata*/, - ColumnFamilyHandle** /*handle*/) override { - return Status::NotSupported("Not supported in compacted db mode."); - } - - private: - friend class DB; - inline size_t FindFile(const Slice& key); - Status Init(const Options& options); - - ColumnFamilyData* cfd_; - Version* version_; - const Comparator* user_comparator_; - LevelFilesBrief files_; -}; -} // namespace ROCKSDB_NAMESPACE -#endif // ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/clipping_iterator.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/clipping_iterator.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/clipping_iterator.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/clipping_iterator.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,275 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "table/internal_iterator.h" + +namespace ROCKSDB_NAMESPACE { + +// An internal iterator that wraps another one and ensures that any keys +// returned are strictly within a range [start, end). If the underlying +// iterator has already performed the bounds checking, it relies on that result; +// otherwise, it performs the necessary key comparisons itself. Both bounds +// are optional. +class ClippingIterator : public InternalIterator { + public: + ClippingIterator(InternalIterator* iter, const Slice* start, const Slice* end, + const Comparator* cmp) + : iter_(iter), start_(start), end_(end), cmp_(cmp), valid_(false) { + assert(iter_); + assert(cmp_); + assert(!start_ || !end_ || cmp_->Compare(*start_, *end_) <= 0); + + UpdateAndEnforceBounds(); + } + + bool Valid() const override { return valid_; } + + void SeekToFirst() override { + if (start_) { + iter_->Seek(*start_); + } else { + iter_->SeekToFirst(); + } + + UpdateAndEnforceUpperBound(); + } + + void SeekToLast() override { + if (end_) { + iter_->SeekForPrev(*end_); + + // Upper bound is exclusive, so we need a key which is strictly smaller + if (iter_->Valid() && cmp_->Compare(iter_->key(), *end_) == 0) { + iter_->Prev(); + } + } else { + iter_->SeekToLast(); + } + + UpdateAndEnforceLowerBound(); + } + + void Seek(const Slice& target) override { + if (start_ && cmp_->Compare(target, *start_) < 0) { + iter_->Seek(*start_); + UpdateAndEnforceUpperBound(); + return; + } + + if (end_ && cmp_->Compare(target, *end_) >= 0) { + valid_ = false; + return; + } + + iter_->Seek(target); + UpdateAndEnforceUpperBound(); + } + + void SeekForPrev(const Slice& target) override { + if (start_ && cmp_->Compare(target, *start_) < 0) { + valid_ = false; + return; + } + + if (end_ && cmp_->Compare(target, *end_) >= 0) { + iter_->SeekForPrev(*end_); + + // Upper bound is exclusive, so we need a key which is strictly smaller + if (iter_->Valid() && cmp_->Compare(iter_->key(), *end_) == 0) { + iter_->Prev(); + } + + UpdateAndEnforceLowerBound(); + return; + } + + iter_->SeekForPrev(target); + UpdateAndEnforceLowerBound(); + } + + void Next() override { + assert(valid_); + iter_->Next(); + UpdateAndEnforceUpperBound(); + } + + bool NextAndGetResult(IterateResult* result) override { + assert(valid_); + assert(result); + + IterateResult res; + valid_ = iter_->NextAndGetResult(&res); + + if (!valid_) { + return false; + } + + if (end_) { + EnforceUpperBoundImpl(res.bound_check_result); + + if (!valid_) { + return false; + } + } + + res.bound_check_result = IterBoundCheck::kInbound; + *result = res; + + return true; + } + + void Prev() override { + assert(valid_); + iter_->Prev(); + UpdateAndEnforceLowerBound(); + } + + Slice key() const override { + assert(valid_); + return iter_->key(); + } + + Slice user_key() const override { + assert(valid_); + return iter_->user_key(); + } + + Slice value() const override { + assert(valid_); + return iter_->value(); + } + + Status status() const override { return iter_->status(); } + + bool PrepareValue() override { + assert(valid_); + + if (iter_->PrepareValue()) { + return true; + } + + assert(!iter_->Valid()); + valid_ = false; + return false; + } + + bool MayBeOutOfLowerBound() override { + assert(valid_); + return false; + } + + IterBoundCheck UpperBoundCheckResult() override { + assert(valid_); + return IterBoundCheck::kInbound; + } + + void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { + iter_->SetPinnedItersMgr(pinned_iters_mgr); + } + + bool IsKeyPinned() const override { + assert(valid_); + return iter_->IsKeyPinned(); + } + + bool IsValuePinned() const override { + assert(valid_); + return iter_->IsValuePinned(); + } + + Status GetProperty(std::string prop_name, std::string* prop) override { + return iter_->GetProperty(prop_name, prop); + } + + private: + void UpdateValid() { + assert(!iter_->Valid() || iter_->status().ok()); + + valid_ = iter_->Valid(); + } + + void EnforceUpperBoundImpl(IterBoundCheck bound_check_result) { + if (bound_check_result == IterBoundCheck::kInbound) { + return; + } + + if (bound_check_result == IterBoundCheck::kOutOfBound) { + valid_ = false; + return; + } + + assert(bound_check_result == IterBoundCheck::kUnknown); + + if (cmp_->Compare(key(), *end_) >= 0) { + valid_ = false; + } + } + + void EnforceUpperBound() { + if (!valid_) { + return; + } + + if (!end_) { + return; + } + + EnforceUpperBoundImpl(iter_->UpperBoundCheckResult()); + } + + void EnforceLowerBound() { + if (!valid_) { + return; + } + + if (!start_) { + return; + } + + if (!iter_->MayBeOutOfLowerBound()) { + return; + } + + if (cmp_->Compare(key(), *start_) < 0) { + valid_ = false; + } + } + + void AssertBounds() { + assert(!valid_ || !start_ || cmp_->Compare(key(), *start_) >= 0); + assert(!valid_ || !end_ || cmp_->Compare(key(), *end_) < 0); + } + + void UpdateAndEnforceBounds() { + UpdateValid(); + EnforceUpperBound(); + EnforceLowerBound(); + AssertBounds(); + } + + void UpdateAndEnforceUpperBound() { + UpdateValid(); + EnforceUpperBound(); + AssertBounds(); + } + + void UpdateAndEnforceLowerBound() { + UpdateValid(); + EnforceLowerBound(); + AssertBounds(); + } + + InternalIterator* iter_; + const Slice* start_; + const Slice* end_; + const Comparator* cmp_; + bool valid_; +}; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,258 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/compaction/clipping_iterator.h" + +#include +#include +#include +#include + +#include "db/dbformat.h" +#include "rocksdb/comparator.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" +#include "util/vector_iterator.h" + +namespace ROCKSDB_NAMESPACE { + +// A vector iterator which does its own bounds checking. This is for testing the +// optimizations in the clipping iterator where we bypass the bounds checking if +// the input iterator has already performed it. +class BoundsCheckingVectorIterator : public VectorIterator { + public: + BoundsCheckingVectorIterator(const std::vector& keys, + const std::vector& values, + const Slice* start, const Slice* end, + const Comparator* cmp) + : VectorIterator(keys, values, cmp), start_(start), end_(end), cmp_(cmp) { + assert(cmp_); + } + + bool NextAndGetResult(IterateResult* result) override { + assert(Valid()); + assert(result); + + Next(); + + if (!Valid()) { + return false; + } + + result->key = key(); + result->bound_check_result = UpperBoundCheckResult(); + result->value_prepared = true; + + return true; + } + + bool MayBeOutOfLowerBound() override { + assert(Valid()); + + if (!start_) { + return false; + } + + return cmp_->Compare(key(), *start_) < 0; + } + + IterBoundCheck UpperBoundCheckResult() override { + assert(Valid()); + + if (!end_) { + return IterBoundCheck::kInbound; + } + + return cmp_->Compare(key(), *end_) >= 0 ? IterBoundCheck::kOutOfBound + : IterBoundCheck::kInbound; + } + + private: + const Slice* start_; + const Slice* end_; + const Comparator* cmp_; +}; + +class ClippingIteratorTest + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +TEST_P(ClippingIteratorTest, Clip) { + const std::vector keys{"key0", "key1", "key2", "key3", "key4", + "key5", "key6", "key7", "key8", "key9"}; + const std::vector values{ + "unused0", "value1", "value2", "value3", "unused4", + "unused5", "unused6", "unused7", "unused8", "unused9"}; + + assert(keys.size() == values.size()); + + // Note: the input always contains key1, key2, and key3; however, the clipping + // window is based on the test parameters: its left edge is a value in the + // range [0, 4], and its size is a value in the range [0, 5] + const std::vector input_keys{keys[1], keys[2], keys[3]}; + const std::vector input_values{values[1], values[2], values[3]}; + + const bool use_bounds_checking_vec_it = std::get<0>(GetParam()); + + const size_t clip_start_idx = std::get<1>(GetParam()); + const size_t clip_window_size = std::get<2>(GetParam()); + const size_t clip_end_idx = clip_start_idx + clip_window_size; + + const Slice start(keys[clip_start_idx]); + const Slice end(keys[clip_end_idx]); + + std::unique_ptr input( + use_bounds_checking_vec_it + ? new BoundsCheckingVectorIterator(input_keys, input_values, &start, + &end, BytewiseComparator()) + : new VectorIterator(input_keys, input_values, BytewiseComparator())); + + ClippingIterator clip(input.get(), &start, &end, BytewiseComparator()); + + // The range the clipping iterator should return values from. This is + // essentially the intersection of the input range [1, 4) and the clipping + // window [clip_start_idx, clip_end_idx) + const size_t data_start_idx = + std::max(clip_start_idx, static_cast(1)); + const size_t data_end_idx = std::min(clip_end_idx, static_cast(4)); + + // Range is empty; all Seeks should fail + if (data_start_idx >= data_end_idx) { + clip.SeekToFirst(); + ASSERT_FALSE(clip.Valid()); + + clip.SeekToLast(); + ASSERT_FALSE(clip.Valid()); + + for (size_t i = 0; i < keys.size(); ++i) { + clip.Seek(keys[i]); + ASSERT_FALSE(clip.Valid()); + + clip.SeekForPrev(keys[i]); + ASSERT_FALSE(clip.Valid()); + } + + return; + } + + // Range is non-empty; call SeekToFirst and iterate forward + clip.SeekToFirst(); + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[data_start_idx]); + ASSERT_EQ(clip.value(), values[data_start_idx]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + + for (size_t i = data_start_idx + 1; i < data_end_idx; ++i) { + clip.Next(); + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[i]); + ASSERT_EQ(clip.value(), values[i]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + } + + clip.Next(); + ASSERT_FALSE(clip.Valid()); + + // Do it again using NextAndGetResult + clip.SeekToFirst(); + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[data_start_idx]); + ASSERT_EQ(clip.value(), values[data_start_idx]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + + for (size_t i = data_start_idx + 1; i < data_end_idx; ++i) { + IterateResult result; + ASSERT_TRUE(clip.NextAndGetResult(&result)); + ASSERT_EQ(result.key, keys[i]); + ASSERT_EQ(result.bound_check_result, IterBoundCheck::kInbound); + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[i]); + ASSERT_EQ(clip.value(), values[i]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + } + + IterateResult result; + ASSERT_FALSE(clip.NextAndGetResult(&result)); + ASSERT_FALSE(clip.Valid()); + + // Call SeekToLast and iterate backward + clip.SeekToLast(); + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[data_end_idx - 1]); + ASSERT_EQ(clip.value(), values[data_end_idx - 1]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + + for (size_t i = data_end_idx - 2; i >= data_start_idx; --i) { + clip.Prev(); + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[i]); + ASSERT_EQ(clip.value(), values[i]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + } + + clip.Prev(); + ASSERT_FALSE(clip.Valid()); + + // Call Seek/SeekForPrev for all keys; Seek should return the smallest key + // which is >= the target; SeekForPrev should return the largest key which is + // <= the target + for (size_t i = 0; i < keys.size(); ++i) { + clip.Seek(keys[i]); + + if (i < data_start_idx) { + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[data_start_idx]); + ASSERT_EQ(clip.value(), values[data_start_idx]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + } else if (i < data_end_idx) { + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[i]); + ASSERT_EQ(clip.value(), values[i]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + } else { + ASSERT_FALSE(clip.Valid()); + } + + clip.SeekForPrev(keys[i]); + + if (i < data_start_idx) { + ASSERT_FALSE(clip.Valid()); + } else if (i < data_end_idx) { + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[i]); + ASSERT_EQ(clip.value(), values[i]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + } else { + ASSERT_TRUE(clip.Valid()); + ASSERT_EQ(clip.key(), keys[data_end_idx - 1]); + ASSERT_EQ(clip.value(), values[data_end_idx - 1]); + ASSERT_FALSE(clip.MayBeOutOfLowerBound()); + ASSERT_EQ(clip.UpperBoundCheckResult(), IterBoundCheck::kInbound); + } + } +} + +INSTANTIATE_TEST_CASE_P( + ClippingIteratorTest, ClippingIteratorTest, + ::testing::Combine( + ::testing::Bool(), + ::testing::Range(static_cast(0), static_cast(5)), + ::testing::Range(static_cast(0), static_cast(6)))); + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction.cc 2025-05-19 16:14:27.000000000 +0000 @@ -7,12 +7,14 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include "db/compaction/compaction.h" + #include #include #include "db/column_family.h" -#include "db/compaction/compaction.h" #include "rocksdb/compaction_filter.h" +#include "rocksdb/sst_partitioner.h" #include "test_util/sync_point.h" #include "util/string_util.h" @@ -23,7 +25,7 @@ int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a, const InternalKey& b) { - auto c = user_cmp->Compare(a.user_key(), b.user_key()); + auto c = user_cmp->CompareWithoutTimestamp(a.user_key(), b.user_key()); if (c != 0) { return c; } @@ -202,26 +204,24 @@ return num_files_in_compaction == total_num_files; } -Compaction::Compaction(VersionStorageInfo* vstorage, - const ImmutableCFOptions& _immutable_cf_options, - const MutableCFOptions& _mutable_cf_options, - std::vector _inputs, - int _output_level, uint64_t _target_file_size, - uint64_t _max_compaction_bytes, uint32_t _output_path_id, - CompressionType _compression, - CompressionOptions _compression_opts, - uint32_t _max_subcompactions, - std::vector _grandparents, - bool _manual_compaction, double _score, - bool _deletion_compaction, - CompactionReason _compaction_reason) +Compaction::Compaction( + VersionStorageInfo* vstorage, const ImmutableOptions& _immutable_options, + const MutableCFOptions& _mutable_cf_options, + const MutableDBOptions& _mutable_db_options, + std::vector _inputs, int _output_level, + uint64_t _target_file_size, uint64_t _max_compaction_bytes, + uint32_t _output_path_id, CompressionType _compression, + CompressionOptions _compression_opts, Temperature _output_temperature, + uint32_t _max_subcompactions, std::vector _grandparents, + bool _manual_compaction, double _score, bool _deletion_compaction, + CompactionReason _compaction_reason) : input_vstorage_(vstorage), start_level_(_inputs[0].level), output_level_(_output_level), max_output_file_size_(_target_file_size), max_compaction_bytes_(_max_compaction_bytes), max_subcompactions_(_max_subcompactions), - immutable_cf_options_(_immutable_cf_options), + immutable_options_(_immutable_options), mutable_cf_options_(_mutable_cf_options), input_version_(nullptr), number_levels_(vstorage->num_levels()), @@ -229,6 +229,7 @@ output_path_id_(_output_path_id), output_compression_(_compression), output_compression_opts_(_compression_opts), + output_temperature_(_output_temperature), deletion_compaction_(_deletion_compaction), inputs_(PopulateWithAtomicBoundaries(vstorage, std::move(_inputs))), grandparents_(std::move(_grandparents)), @@ -237,19 +238,14 @@ is_full_compaction_(IsFullCompaction(vstorage, inputs_)), is_manual_compaction_(_manual_compaction), is_trivial_move_(false), - compaction_reason_(_compaction_reason) { + compaction_reason_(_compaction_reason), + notify_on_compaction_completion_(false) { MarkFilesBeingCompacted(true); if (is_manual_compaction_) { compaction_reason_ = CompactionReason::kManualCompaction; } if (max_subcompactions_ == 0) { - max_subcompactions_ = immutable_cf_options_.max_subcompactions; - } - if (!bottommost_level_) { - // Currently we only enable dictionary compression during compaction to the - // bottommost level. - output_compression_opts_.max_dict_bytes = 0; - output_compression_opts_.zstd_max_train_bytes = 0; + max_subcompactions_ = _mutable_db_options.max_subcompactions; } #ifndef NDEBUG @@ -281,7 +277,7 @@ bool Compaction::InputCompressionMatchesOutput() const { int base_level = input_vstorage_->base_level(); - bool matches = (GetCompressionType(immutable_cf_options_, input_vstorage_, + bool matches = (GetCompressionType(immutable_options_, input_vstorage_, mutable_cf_options_, start_level_, base_level) == output_compression_); if (matches) { @@ -306,13 +302,19 @@ } if (is_manual_compaction_ && - (immutable_cf_options_.compaction_filter != nullptr || - immutable_cf_options_.compaction_filter_factory != nullptr)) { + (immutable_options_.compaction_filter != nullptr || + immutable_options_.compaction_filter_factory != nullptr)) { // This is a manual compaction and we have a compaction filter that should // be executed, we cannot do a trivial move return false; } + if (start_level_ == output_level_) { + // It doesn't make sense if compaction picker picks files just to trivial + // move to the same level. + return false; + } + // Used in universal compaction, where trivial move can be done if the // input files are non overlapping if ((mutable_cf_options_.compaction_options_universal.allow_trivial_move) && @@ -328,6 +330,8 @@ // assert inputs_.size() == 1 + std::unique_ptr partitioner = CreateSstPartitioner(); + for (const auto& file : inputs_.front().files) { std::vector file_grand_parents; if (output_level_ + 1 >= number_levels_) { @@ -340,6 +344,13 @@ if (compaction_size > max_compaction_bytes_) { return false; } + + if (partitioner.get() != nullptr) { + if (!partitioner->CanDoTrivialMove(file->smallest.user_key(), + file->largest.user_key())) { + return false; + } + } } return true; @@ -371,7 +382,13 @@ auto* f = files[level_ptrs->at(lvl)]; if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) { // We've advanced far enough - if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) { + // In the presence of user-defined timestamp, we may need to handle + // the case in which f->smallest.user_key() (including ts) has the + // same user key, but the ts part is smaller. If so, + // Compare(user_key, f->smallest.user_key()) returns -1. + // That's why we need CompareWithoutTimestamp(). + if (user_cmp->CompareWithoutTimestamp(user_key, + f->smallest.user_key()) >= 0) { // Key falls in this file's range, so it may // exist beyond output level return false; @@ -500,14 +517,14 @@ } if (max_output_file_size_ != port::kMaxUint64 && - (immutable_cf_options_.compaction_style == kCompactionStyleLevel || + (immutable_options_.compaction_style == kCompactionStyleLevel || output_level() > 0)) { preallocation_size = std::min(max_output_file_size_, preallocation_size); } // Over-estimate slightly so we don't end up just barely crossing // the threshold - // No point to prellocate more than 1GB. + // No point to preallocate more than 1GB. return std::min(uint64_t{1073741824}, preallocation_size + (preallocation_size / 10)); } @@ -517,14 +534,35 @@ return nullptr; } + if (!cfd_->ioptions() + ->compaction_filter_factory->ShouldFilterTableFileCreation( + TableFileCreationReason::kCompaction)) { + return nullptr; + } + CompactionFilter::Context context; context.is_full_compaction = is_full_compaction_; context.is_manual_compaction = is_manual_compaction_; context.column_family_id = cfd_->GetID(); + context.reason = TableFileCreationReason::kCompaction; return cfd_->ioptions()->compaction_filter_factory->CreateCompactionFilter( context); } +std::unique_ptr Compaction::CreateSstPartitioner() const { + if (!immutable_options_.sst_partitioner_factory) { + return nullptr; + } + + SstPartitioner::Context context; + context.is_full_compaction = is_full_compaction_; + context.is_manual_compaction = is_manual_compaction_; + context.output_level = output_level_; + context.smallest_user_key = smallest_user_key_; + context.largest_user_key = largest_user_key_; + return immutable_options_.sst_partitioner_factory->CreatePartitioner(context); +} + bool Compaction::IsOutputLevelEmpty() const { return inputs_.back().level != output_level_ || inputs_.back().empty(); } @@ -533,6 +571,14 @@ if (max_subcompactions_ <= 1 || cfd_ == nullptr) { return false; } + + // Note: the subcompaction boundary picking logic does not currently guarantee + // that all user keys that differ only by timestamp get processed by the same + // subcompaction. + if (cfd_->user_comparator()->timestamp_size() > 0) { + return false; + } + if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel) { return (start_level_ == 0 || is_manual_compaction_) && output_level_ > 0 && !IsOutputLevelEmpty(); @@ -543,10 +589,42 @@ } } -uint64_t Compaction::MinInputFileOldestAncesterTime() const { +bool Compaction::DoesInputReferenceBlobFiles() const { + assert(input_version_); + + const VersionStorageInfo* storage_info = input_version_->storage_info(); + assert(storage_info); + + if (storage_info->GetBlobFiles().empty()) { + return false; + } + + for (size_t i = 0; i < inputs_.size(); ++i) { + for (const FileMetaData* meta : inputs_[i].files) { + assert(meta); + + if (meta->oldest_blob_file_number != kInvalidBlobFileNumber) { + return true; + } + } + } + + return false; +} + +uint64_t Compaction::MinInputFileOldestAncesterTime( + const InternalKey* start, const InternalKey* end) const { uint64_t min_oldest_ancester_time = port::kMaxUint64; + const InternalKeyComparator& icmp = + column_family_data()->internal_comparator(); for (const auto& level_files : inputs_) { for (const auto& file : level_files.files) { + if (start != nullptr && icmp.Compare(file->largest, *start) < 0) { + continue; + } + if (end != nullptr && icmp.Compare(file->smallest, *end) > 0) { + continue; + } uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime(); if (oldest_ancester_time != 0) { min_oldest_ancester_time = diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction.h 2025-05-19 16:14:27.000000000 +0000 @@ -11,6 +11,7 @@ #include "db/version_set.h" #include "memory/arena.h" #include "options/cf_options.h" +#include "rocksdb/sst_partitioner.h" #include "util/autovector.h" namespace ROCKSDB_NAMESPACE { @@ -69,12 +70,14 @@ class Compaction { public: Compaction(VersionStorageInfo* input_version, - const ImmutableCFOptions& immutable_cf_options, + const ImmutableOptions& immutable_options, const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, std::vector inputs, int output_level, uint64_t target_file_size, uint64_t max_compaction_bytes, uint32_t output_path_id, CompressionType compression, - CompressionOptions compression_opts, uint32_t max_subcompactions, + CompressionOptions compression_opts, + Temperature output_temperature, uint32_t max_subcompactions, std::vector grandparents, bool manual_compaction = false, double score = -1, bool deletion_compaction = false, @@ -160,7 +163,7 @@ CompressionType output_compression() const { return output_compression_; } // What compression options for output - CompressionOptions output_compression_opts() const { + const CompressionOptions& output_compression_opts() const { return output_compression_opts_; } @@ -221,10 +224,10 @@ // How many total levels are there? int number_levels() const { return number_levels_; } - // Return the ImmutableCFOptions that should be used throughout the compaction + // Return the ImmutableOptions that should be used throughout the compaction // procedure - const ImmutableCFOptions* immutable_cf_options() const { - return &immutable_cf_options_; + const ImmutableOptions* immutable_options() const { + return &immutable_options_; } // Return the MutableCFOptions that should be used throughout the compaction @@ -255,12 +258,20 @@ // Create a CompactionFilter from compaction_filter_factory std::unique_ptr CreateCompactionFilter() const; + // Create a SstPartitioner from sst_partitioner_factory + std::unique_ptr CreateSstPartitioner() const; + // Is the input level corresponding to output_level_ empty? bool IsOutputLevelEmpty() const; // Should this compaction be broken up into smaller ones run in parallel? bool ShouldFormSubcompactions() const; + // Returns true iff at least one input file references a blob file. + // + // PRE: input version has been set. + bool DoesInputReferenceBlobFiles() const; + // test function to validate the functionality of IsBottommostLevel() // function -- determines if compaction with inputs and storage is bottommost static bool TEST_IsBottommostLevel( @@ -289,9 +300,24 @@ uint64_t max_compaction_bytes() const { return max_compaction_bytes_; } + Temperature output_temperature() const { return output_temperature_; } + uint32_t max_subcompactions() const { return max_subcompactions_; } - uint64_t MinInputFileOldestAncesterTime() const; + // start and end are sub compact range. Null if no boundary. + // This is used to filter out some input files' ancester's time range. + uint64_t MinInputFileOldestAncesterTime(const InternalKey* start, + const InternalKey* end) const; + + // Called by DBImpl::NotifyOnCompactionCompleted to make sure number of + // compaction begin and compaction completion callbacks match. + void SetNotifyOnCompactionCompleted() { + notify_on_compaction_completion_ = true; + } + + bool ShouldNotifyOnCompactionCompleted() const { + return notify_on_compaction_completion_; + } private: // mark (or clear) all files that are being compacted @@ -325,7 +351,7 @@ uint64_t max_output_file_size_; uint64_t max_compaction_bytes_; uint32_t max_subcompactions_; - const ImmutableCFOptions immutable_cf_options_; + const ImmutableOptions immutable_options_; const MutableCFOptions mutable_cf_options_; Version* input_version_; VersionEdit edit_; @@ -336,7 +362,8 @@ const uint32_t output_path_id_; CompressionType output_compression_; CompressionOptions output_compression_opts_; - // If true, then the comaction can be done by simply deleting input files. + Temperature output_temperature_; + // If true, then the compaction can be done by simply deleting input files. const bool deletion_compaction_; // Compaction input files organized by level. Constant after construction @@ -376,6 +403,10 @@ // Reason for compaction CompactionReason compaction_reason_; + + // Notify on compaction completion only if listener was notified on compaction + // begin. + bool notify_on_compaction_completion_; }; // Return sum of sizes of all files in `files`. diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h 2025-05-19 16:14:27.000000000 +0000 @@ -5,8 +5,12 @@ #pragma once +#include + #include "rocksdb/rocksdb_namespace.h" +namespace ROCKSDB_NAMESPACE { + struct CompactionIterationStats { // Compaction statistics @@ -34,4 +38,12 @@ // Single-Delete diagnostics for exceptional situations uint64_t num_single_del_fallthru = 0; uint64_t num_single_del_mismatch = 0; + + // Blob related statistics + uint64_t num_blobs_read = 0; + uint64_t total_blob_bytes_read = 0; + uint64_t num_blobs_relocated = 0; + uint64_t total_blob_bytes_relocated = 0; }; + +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.cc 2025-05-19 16:14:27.000000000 +0000 @@ -3,53 +3,48 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#include - #include "db/compaction/compaction_iterator.h" + +#include +#include + +#include "db/blob/blob_fetcher.h" +#include "db/blob/blob_file_builder.h" +#include "db/blob/blob_index.h" +#include "db/blob/prefetch_buffer_collection.h" #include "db/snapshot_checker.h" +#include "logging/logging.h" #include "port/likely.h" #include "rocksdb/listener.h" #include "table/internal_iterator.h" #include "test_util/sync_point.h" -#define DEFINITELY_IN_SNAPSHOT(seq, snapshot) \ - ((seq) <= (snapshot) && \ - (snapshot_checker_ == nullptr || \ - LIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) == \ - SnapshotCheckerResult::kInSnapshot))) - -#define DEFINITELY_NOT_IN_SNAPSHOT(seq, snapshot) \ - ((seq) > (snapshot) || \ - (snapshot_checker_ != nullptr && \ - UNLIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) == \ - SnapshotCheckerResult::kNotInSnapshot))) - -#define IN_EARLIEST_SNAPSHOT(seq) \ - ((seq) <= earliest_snapshot_ && \ - (snapshot_checker_ == nullptr || LIKELY(IsInEarliestSnapshot(seq)))) - namespace ROCKSDB_NAMESPACE { - CompactionIterator::CompactionIterator( InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, SequenceNumber last_sequence, std::vector* snapshots, SequenceNumber earliest_write_conflict_snapshot, const SnapshotChecker* snapshot_checker, Env* env, bool report_detailed_time, bool expect_valid_internal_key, - CompactionRangeDelAggregator* range_del_agg, const Compaction* compaction, - const CompactionFilter* compaction_filter, + CompactionRangeDelAggregator* range_del_agg, + BlobFileBuilder* blob_file_builder, bool allow_data_in_errors, + const Compaction* compaction, const CompactionFilter* compaction_filter, const std::atomic* shutting_down, const SequenceNumber preserve_deletes_seqnum, - const std::atomic* manual_compaction_paused, - const std::shared_ptr info_log) + const std::atomic* manual_compaction_paused, + const std::atomic* manual_compaction_canceled, + const std::shared_ptr info_log, + const std::string* full_history_ts_low) : CompactionIterator( input, cmp, merge_helper, last_sequence, snapshots, earliest_write_conflict_snapshot, snapshot_checker, env, report_detailed_time, expect_valid_internal_key, range_del_agg, + blob_file_builder, allow_data_in_errors, std::unique_ptr( - compaction ? new CompactionProxy(compaction) : nullptr), + compaction ? new RealCompaction(compaction) : nullptr), compaction_filter, shutting_down, preserve_deletes_seqnum, - manual_compaction_paused, info_log) {} + manual_compaction_paused, manual_compaction_canceled, info_log, + full_history_ts_low) {} CompactionIterator::CompactionIterator( InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, @@ -58,36 +53,54 @@ const SnapshotChecker* snapshot_checker, Env* env, bool report_detailed_time, bool expect_valid_internal_key, CompactionRangeDelAggregator* range_del_agg, + BlobFileBuilder* blob_file_builder, bool allow_data_in_errors, std::unique_ptr compaction, const CompactionFilter* compaction_filter, const std::atomic* shutting_down, const SequenceNumber preserve_deletes_seqnum, - const std::atomic* manual_compaction_paused, - const std::shared_ptr info_log) - : input_(input), + const std::atomic* manual_compaction_paused, + const std::atomic* manual_compaction_canceled, + const std::shared_ptr info_log, + const std::string* full_history_ts_low) + : input_(input, cmp, + !compaction || compaction->DoesInputReferenceBlobFiles()), cmp_(cmp), merge_helper_(merge_helper), snapshots_(snapshots), earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot), snapshot_checker_(snapshot_checker), env_(env), + clock_(env_->GetSystemClock().get()), report_detailed_time_(report_detailed_time), expect_valid_internal_key_(expect_valid_internal_key), range_del_agg_(range_del_agg), + blob_file_builder_(blob_file_builder), compaction_(std::move(compaction)), compaction_filter_(compaction_filter), shutting_down_(shutting_down), manual_compaction_paused_(manual_compaction_paused), + manual_compaction_canceled_(manual_compaction_canceled), preserve_deletes_seqnum_(preserve_deletes_seqnum), + info_log_(info_log), + allow_data_in_errors_(allow_data_in_errors), + timestamp_size_(cmp_ ? cmp_->timestamp_size() : 0), + full_history_ts_low_(full_history_ts_low), current_user_key_sequence_(0), current_user_key_snapshot_(0), merge_out_iter_(merge_helper_), + blob_garbage_collection_cutoff_file_number_( + ComputeBlobGarbageCollectionCutoffFileNumber(compaction_.get())), + blob_fetcher_(CreateBlobFetcherIfNeeded(compaction_.get())), + prefetch_buffers_( + CreatePrefetchBufferCollectionIfNeeded(compaction_.get())), current_key_committed_(false), - info_log_(info_log) { - assert(compaction_filter_ == nullptr || compaction_ != nullptr); + cmp_with_history_ts_low_(0), + level_(compaction_ == nullptr ? 0 : compaction_->level()) { assert(snapshots_ != nullptr); - bottommost_level_ = - compaction_ == nullptr ? false : compaction_->bottommost_level(); + bottommost_level_ = compaction_ == nullptr + ? false + : compaction_->bottommost_level() && + !compaction_->allow_ingest_behind(); if (compaction_ != nullptr) { level_ptrs_ = std::vector(compaction_->number_levels(), 0); } @@ -108,14 +121,16 @@ for (size_t i = 1; i < snapshots_->size(); ++i) { assert(snapshots_->at(i - 1) < snapshots_->at(i)); } + assert(timestamp_size_ == 0 || !full_history_ts_low_ || + timestamp_size_ == full_history_ts_low_->size()); #endif - input_->SetPinnedItersMgr(&pinned_iters_mgr_); + input_.SetPinnedItersMgr(&pinned_iters_mgr_); TEST_SYNC_POINT_CALLBACK("CompactionIterator:AfterInit", compaction_.get()); } CompactionIterator::~CompactionIterator() { - // input_ Iteartor lifetime is longer than pinned_iters_mgr_ lifetime - input_->SetPinnedItersMgr(nullptr); + // input_ Iterator lifetime is longer than pinned_iters_mgr_ lifetime + input_.SetPinnedItersMgr(nullptr); } void CompactionIterator::ResetRecordCounts() { @@ -142,14 +157,13 @@ if (merge_out_iter_.Valid()) { key_ = merge_out_iter_.key(); value_ = merge_out_iter_.value(); - bool valid_key __attribute__((__unused__)); - valid_key = ParseInternalKey(key_, &ikey_); + Status s = ParseInternalKey(key_, &ikey_, allow_data_in_errors_); // MergeUntil stops when it encounters a corrupt key and does not // include them in the result, so we expect the keys here to be valid. - assert(valid_key); - if (!valid_key) { - ROCKS_LOG_FATAL(info_log_, "Invalid key (%s) in compaction", - key_.ToString(true).c_str()); + assert(s.ok()); + if (!s.ok()) { + ROCKS_LOG_FATAL(info_log_, "Invalid key in compaction. %s", + s.getState()); } // Keep current_key_ in sync. @@ -169,7 +183,7 @@ // Only advance the input iterator if there is no merge output and the // iterator is not already at the next record. if (!at_next_) { - input_->Next(); + AdvanceInputIter(); } NextFromInput(); } @@ -182,90 +196,191 @@ PrepareOutput(); } -void CompactionIterator::InvokeFilterIfNeeded(bool* need_skip, +bool CompactionIterator::InvokeFilterIfNeeded(bool* need_skip, Slice* skip_until) { - if (compaction_filter_ != nullptr && - (ikey_.type == kTypeValue || ikey_.type == kTypeBlobIndex)) { - // If the user has specified a compaction filter and the sequence - // number is greater than any external snapshot, then invoke the - // filter. If the return value of the compaction filter is true, - // replace the entry with a deletion marker. - CompactionFilter::Decision filter; - compaction_filter_value_.clear(); - compaction_filter_skip_until_.Clear(); - CompactionFilter::ValueType value_type = - ikey_.type == kTypeValue ? CompactionFilter::ValueType::kValue - : CompactionFilter::ValueType::kBlobIndex; - // Hack: pass internal key to BlobIndexCompactionFilter since it needs - // to get sequence number. - Slice& filter_key = ikey_.type == kTypeValue ? ikey_.user_key : key_; - { - StopWatchNano timer(env_, report_detailed_time_); + if (!compaction_filter_ || + (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex)) { + return true; + } + bool error = false; + // If the user has specified a compaction filter and the sequence + // number is greater than any external snapshot, then invoke the + // filter. If the return value of the compaction filter is true, + // replace the entry with a deletion marker. + CompactionFilter::Decision filter = CompactionFilter::Decision::kUndetermined; + compaction_filter_value_.clear(); + compaction_filter_skip_until_.Clear(); + CompactionFilter::ValueType value_type = + ikey_.type == kTypeValue ? CompactionFilter::ValueType::kValue + : CompactionFilter::ValueType::kBlobIndex; + // Hack: pass internal key to BlobIndexCompactionFilter since it needs + // to get sequence number. + assert(compaction_filter_); + Slice& filter_key = + (ikey_.type == kTypeValue || + !compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) + ? ikey_.user_key + : key_; + { + StopWatchNano timer(clock_, report_detailed_time_); + if (kTypeBlobIndex == ikey_.type) { + blob_value_.Reset(); + filter = compaction_filter_->FilterBlobByKey( + level_, filter_key, &compaction_filter_value_, + compaction_filter_skip_until_.rep()); + if (CompactionFilter::Decision::kUndetermined == filter && + !compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) { + if (compaction_ == nullptr) { + status_ = + Status::Corruption("Unexpected blob index outside of compaction"); + valid_ = false; + return false; + } + + // For integrated BlobDB impl, CompactionIterator reads blob value. + // For Stacked BlobDB impl, the corresponding CompactionFilter's + // FilterV2 method should read the blob value. + BlobIndex blob_index; + Status s = blob_index.DecodeFrom(value_); + if (!s.ok()) { + status_ = s; + valid_ = false; + return false; + } + + FilePrefetchBuffer* prefetch_buffer = + prefetch_buffers_ ? prefetch_buffers_->GetOrCreatePrefetchBuffer( + blob_index.file_number()) + : nullptr; + + uint64_t bytes_read = 0; + + assert(blob_fetcher_); + + s = blob_fetcher_->FetchBlob(ikey_.user_key, blob_index, + prefetch_buffer, &blob_value_, + &bytes_read); + if (!s.ok()) { + status_ = s; + valid_ = false; + return false; + } + + ++iter_stats_.num_blobs_read; + iter_stats_.total_blob_bytes_read += bytes_read; + + value_type = CompactionFilter::ValueType::kValue; + } + } + if (CompactionFilter::Decision::kUndetermined == filter) { filter = compaction_filter_->FilterV2( - compaction_->level(), filter_key, value_type, value_, - &compaction_filter_value_, compaction_filter_skip_until_.rep()); - iter_stats_.total_filter_time += - env_ != nullptr && report_detailed_time_ ? timer.ElapsedNanos() : 0; - } - - if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil && - cmp_->Compare(*compaction_filter_skip_until_.rep(), ikey_.user_key) <= - 0) { - // Can't skip to a key smaller than the current one. - // Keep the key as per FilterV2 documentation. - filter = CompactionFilter::Decision::kKeep; - } - - if (filter == CompactionFilter::Decision::kRemove) { - // convert the current key to a delete; key_ is pointing into - // current_key_ at this point, so updating current_key_ updates key() - ikey_.type = kTypeDeletion; - current_key_.UpdateInternalKey(ikey_.sequence, kTypeDeletion); - // no value associated with delete - value_.clear(); - iter_stats_.num_record_drop_user++; - } else if (filter == CompactionFilter::Decision::kChangeValue) { - value_ = compaction_filter_value_; - } else if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil) { - *need_skip = true; - compaction_filter_skip_until_.ConvertFromUserKey(kMaxSequenceNumber, - kValueTypeForSeek); - *skip_until = compaction_filter_skip_until_.Encode(); + level_, filter_key, value_type, + blob_value_.empty() ? value_ : blob_value_, &compaction_filter_value_, + compaction_filter_skip_until_.rep()); } + iter_stats_.total_filter_time += + env_ != nullptr && report_detailed_time_ ? timer.ElapsedNanos() : 0; + } + + if (CompactionFilter::Decision::kUndetermined == filter) { + // Should not reach here, since FilterV2 should never return kUndetermined. + status_ = + Status::NotSupported("FilterV2() should never return kUndetermined"); + valid_ = false; + return false; + } + + if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil && + cmp_->Compare(*compaction_filter_skip_until_.rep(), ikey_.user_key) <= + 0) { + // Can't skip to a key smaller than the current one. + // Keep the key as per FilterV2 documentation. + filter = CompactionFilter::Decision::kKeep; } + + if (filter == CompactionFilter::Decision::kRemove) { + // convert the current key to a delete; key_ is pointing into + // current_key_ at this point, so updating current_key_ updates key() + ikey_.type = kTypeDeletion; + current_key_.UpdateInternalKey(ikey_.sequence, kTypeDeletion); + // no value associated with delete + value_.clear(); + iter_stats_.num_record_drop_user++; + } else if (filter == CompactionFilter::Decision::kChangeValue) { + if (ikey_.type == kTypeBlobIndex) { + // value transfer from blob file to inlined data + ikey_.type = kTypeValue; + current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); + } + value_ = compaction_filter_value_; + } else if (filter == CompactionFilter::Decision::kRemoveAndSkipUntil) { + *need_skip = true; + compaction_filter_skip_until_.ConvertFromUserKey(kMaxSequenceNumber, + kValueTypeForSeek); + *skip_until = compaction_filter_skip_until_.Encode(); + } else if (filter == CompactionFilter::Decision::kChangeBlobIndex) { + // Only the StackableDB-based BlobDB impl's compaction filter should return + // kChangeBlobIndex. Decision about rewriting blob and changing blob index + // in the integrated BlobDB impl is made in subsequent call to + // PrepareOutput() and its callees. + if (!compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) { + status_ = Status::NotSupported( + "Only stacked BlobDB's internal compaction filter can return " + "kChangeBlobIndex."); + valid_ = false; + return false; + } + if (ikey_.type == kTypeValue) { + // value transfer from inlined data to blob file + ikey_.type = kTypeBlobIndex; + current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); + } + value_ = compaction_filter_value_; + } else if (filter == CompactionFilter::Decision::kIOError) { + if (!compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) { + status_ = Status::NotSupported( + "CompactionFilter for integrated BlobDB should not return kIOError"); + valid_ = false; + return false; + } + status_ = Status::IOError("Failed to access blob during compaction filter"); + error = true; + } + return !error; } void CompactionIterator::NextFromInput() { at_next_ = false; valid_ = false; - while (!valid_ && input_->Valid() && !IsPausingManualCompaction() && + while (!valid_ && input_.Valid() && !IsPausingManualCompaction() && !IsShuttingDown()) { - key_ = input_->key(); - value_ = input_->value(); + key_ = input_.key(); + value_ = input_.value(); iter_stats_.num_input_records++; - if (!ParseInternalKey(key_, &ikey_)) { + Status pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_); + if (!pik_status.ok()) { + iter_stats_.num_input_corrupt_records++; + // If `expect_valid_internal_key_` is false, return the corrupted key // and let the caller decide what to do with it. - // TODO(noetzli): We should have a more elegant solution for this. if (expect_valid_internal_key_) { - assert(!"Corrupted internal key not expected."); - status_ = Status::Corruption("Corrupted internal key not expected."); - break; + status_ = pik_status; + return; } key_ = current_key_.SetInternalKey(key_); has_current_user_key_ = false; current_user_key_sequence_ = kMaxSequenceNumber; current_user_key_snapshot_ = 0; - iter_stats_.num_input_corrupt_records++; valid_ = true; break; } TEST_SYNC_POINT_CALLBACK("CompactionIterator:ProcessKV", &ikey_); // Update input statistics - if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion) { + if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion || + ikey_.type == kTypeDeletionWithTimestamp) { iter_stats_.num_input_deletion_records++; } iter_stats_.total_input_raw_key_bytes += key_.size(); @@ -278,25 +393,71 @@ // merge_helper_->compaction_filter_skip_until_. Slice skip_until; + bool user_key_equal_without_ts = false; + int cmp_ts = 0; + if (has_current_user_key_) { + user_key_equal_without_ts = + cmp_->EqualWithoutTimestamp(ikey_.user_key, current_user_key_); + // if timestamp_size_ > 0, then curr_ts_ has been initialized by a + // previous key. + cmp_ts = timestamp_size_ ? cmp_->CompareTimestamp( + ExtractTimestampFromUserKey( + ikey_.user_key, timestamp_size_), + curr_ts_) + : 0; + } + // Check whether the user key changed. After this if statement current_key_ // is a copy of the current input key (maybe converted to a delete by the // compaction filter). ikey_.user_key is pointing to the copy. - if (!has_current_user_key_ || - !cmp_->Equal(ikey_.user_key, current_user_key_)) { + if (!has_current_user_key_ || !user_key_equal_without_ts || cmp_ts != 0) { // First occurrence of this user key // Copy key for output key_ = current_key_.SetInternalKey(key_, &ikey_); + + int prev_cmp_with_ts_low = + !full_history_ts_low_ ? 0 + : curr_ts_.empty() + ? 0 + : cmp_->CompareTimestamp(curr_ts_, *full_history_ts_low_); + + // If timestamp_size_ > 0, then copy from ikey_ to curr_ts_ for the use + // in next iteration to compare with the timestamp of next key. + UpdateTimestampAndCompareWithFullHistoryLow(); + + // If + // (1) !has_current_user_key_, OR + // (2) timestamp is disabled, OR + // (3) all history will be preserved, OR + // (4) user key (excluding timestamp) is different from previous key, OR + // (5) timestamp is NO older than *full_history_ts_low_, OR + // (6) timestamp is the largest one older than full_history_ts_low_, + // then current_user_key_ must be treated as a different user key. + // This means, if a user key (excluding ts) is the same as the previous + // user key, and its ts is older than *full_history_ts_low_, then we + // consider this key for GC, e.g. it may be dropped if certain conditions + // match. + if (!has_current_user_key_ || !timestamp_size_ || !full_history_ts_low_ || + !user_key_equal_without_ts || cmp_with_history_ts_low_ >= 0 || + prev_cmp_with_ts_low >= 0) { + // Initialize for future comparison for rule (A) and etc. + current_user_key_sequence_ = kMaxSequenceNumber; + current_user_key_snapshot_ = 0; + has_current_user_key_ = true; + } current_user_key_ = ikey_.user_key; - has_current_user_key_ = true; + has_outputted_key_ = false; - current_user_key_sequence_ = kMaxSequenceNumber; - current_user_key_snapshot_ = 0; + + last_key_seq_zeroed_ = false; + current_key_committed_ = KeyCommitted(ikey_.sequence); // Apply the compaction filter to the first committed version of the user // key. - if (current_key_committed_) { - InvokeFilterIfNeeded(&need_skip, &skip_until); + if (current_key_committed_ && + !InvokeFilterIfNeeded(&need_skip, &skip_until)) { + break; } } else { // Update the current key to reflect the new sequence number/type without @@ -316,8 +477,9 @@ current_key_committed_ = KeyCommitted(ikey_.sequence); // Apply the compaction filter to the first committed version of the // user key. - if (current_key_committed_) { - InvokeFilterIfNeeded(&need_skip, &skip_until); + if (current_key_committed_ && + !InvokeFilterIfNeeded(&need_skip, &skip_until)) { + break; } } } @@ -331,8 +493,7 @@ // If there are no snapshots, then this kv affect visibility at tip. // Otherwise, search though all existing snapshots to find the earliest // snapshot that is affected by this kv. - SequenceNumber last_sequence __attribute__((__unused__)); - last_sequence = current_user_key_sequence_; + SequenceNumber last_sequence = current_user_key_sequence_; current_user_key_sequence_ = ikey_.sequence; SequenceNumber last_snapshot = current_user_key_snapshot_; SequenceNumber prev_snapshot = 0; // 0 means no previous snapshot @@ -347,20 +508,25 @@ // In the previous iteration we encountered a single delete that we could // not compact out. We will keep this Put, but can drop it's data. // (See Optimization 3, below.) - assert(ikey_.type == kTypeValue); - if (ikey_.type != kTypeValue) { + assert(ikey_.type == kTypeValue || ikey_.type == kTypeBlobIndex); + if (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex) { ROCKS_LOG_FATAL(info_log_, "Unexpected key type %d for compaction output", ikey_.type); } - assert(current_user_key_snapshot_ == last_snapshot); - if (current_user_key_snapshot_ != last_snapshot) { + assert(current_user_key_snapshot_ >= last_snapshot); + if (current_user_key_snapshot_ < last_snapshot) { ROCKS_LOG_FATAL(info_log_, "current_user_key_snapshot_ (%" PRIu64 - ") != last_snapshot (%" PRIu64 ")", + ") < last_snapshot (%" PRIu64 ")", current_user_key_snapshot_, last_snapshot); } + if (ikey_.type == kTypeBlobIndex) { + ikey_.type = kTypeValue; + current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); + } + value_.clear(); valid_ = true; clear_and_output_next_key_ = false; @@ -372,6 +538,25 @@ // 2) We've already returned a record in this snapshot -OR- // there are no earlier earliest_write_conflict_snapshot. // + // A note about 2) above: + // we try to determine whether there is any earlier write conflict + // checking snapshot by calling DefinitelyInSnapshot() with seq and + // earliest_write_conflict_snapshot as arguments. For write-prepared + // and write-unprepared transactions, if earliest_write_conflict_snapshot + // is evicted from WritePreparedTxnDB::commit_cache, then + // DefinitelyInSnapshot(seq, earliest_write_conflict_snapshot) returns + // false, even if the seq is actually visible within + // earliest_write_conflict_snapshot. Consequently, CompactionIterator + // may try to zero out its sequence number, thus hitting assertion error + // in debug mode or cause incorrect DBIter return result. + // We observe that earliest_write_conflict_snapshot >= earliest_snapshot, + // and the seq zeroing logic depends on + // DefinitelyInSnapshot(seq, earliest_snapshot). Therefore, if we cannot + // determine whether seq is **definitely** in + // earliest_write_conflict_snapshot, then we can additionally check if + // seq is definitely in earliest_snapshot. If the latter holds, then the + // former holds too. + // // Rule 1 is needed for SingleDelete correctness. Rule 2 is needed to // allow Transactions to do write-conflict checking (if we compacted away // all keys, then we wouldn't know that a write happened in this @@ -396,33 +581,78 @@ // we can choose how to handle such a combinations of operations. We will // try to compact out as much as we can in these cases. // We will report counts on these anomalous cases. + // + // Note: If timestamp is enabled, then record will be eligible for + // deletion, only if, along with above conditions (Rule 1 and Rule 2) + // full_history_ts_low_ is specified and timestamp for that key is less + // than *full_history_ts_low_. If it's not eligible for deletion, then we + // will output the SingleDelete. For Optimization 3 also, if + // full_history_ts_low_ is specified and timestamp for the key is less + // than *full_history_ts_low_ then only optimization will be applied. // The easiest way to process a SingleDelete during iteration is to peek // ahead at the next key. + const bool is_timestamp_eligible_for_gc = + (timestamp_size_ == 0 || + (full_history_ts_low_ && cmp_with_history_ts_low_ < 0)); + ParsedInternalKey next_ikey; - input_->Next(); + AdvanceInputIter(); // Check whether the next key exists, is not corrupt, and is the same key // as the single delete. - if (input_->Valid() && ParseInternalKey(input_->key(), &next_ikey) && - cmp_->Equal(ikey_.user_key, next_ikey.user_key)) { - // Check whether the next key belongs to the same snapshot as the - // SingleDelete. - if (prev_snapshot == 0 || - DEFINITELY_NOT_IN_SNAPSHOT(next_ikey.sequence, prev_snapshot)) { - if (next_ikey.type == kTypeSingleDeletion) { - // We encountered two SingleDeletes in a row. This could be due to - // unexpected user input. - // Skip the first SingleDelete and let the next iteration decide how - // to handle the second SingleDelete + if (input_.Valid() && + ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_) + .ok() && + cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) { +#ifndef NDEBUG + const Compaction* c = + compaction_ ? compaction_->real_compaction() : nullptr; +#endif + TEST_SYNC_POINT_CALLBACK( + "CompactionIterator::NextFromInput:SingleDelete:1", + const_cast(c)); + if (last_key_seq_zeroed_) { + ++iter_stats_.num_record_drop_hidden; + ++iter_stats_.num_record_drop_obsolete; + assert(bottommost_level_); + AdvanceInputIter(); + } else if (prev_snapshot == 0 || + DefinitelyNotInSnapshot(next_ikey.sequence, prev_snapshot)) { + // Check whether the next key belongs to the same snapshot as the + // SingleDelete. + + TEST_SYNC_POINT_CALLBACK( + "CompactionIterator::NextFromInput:SingleDelete:2", nullptr); + if (next_ikey.type == kTypeSingleDeletion || + next_ikey.type == kTypeDeletion) { + // We encountered two SingleDeletes for same key in a row. This + // could be due to unexpected user input. If write-(un)prepared + // transaction is used, this could also be due to releasing an old + // snapshot between a Put and its matching SingleDelete. + // Furthermore, if write-(un)prepared transaction is rolled back + // after prepare, we will write a Delete to cancel a prior Put. If + // old snapshot is released between a later Put and its matching + // SingleDelete, we will end up with a Delete followed by + // SingleDelete. + // Skip the first SingleDelete and let the next iteration decide + // how to handle the second SingleDelete or Delete. // First SingleDelete has been skipped since we already called - // input_->Next(). + // input_.Next(). ++iter_stats_.num_record_drop_obsolete; ++iter_stats_.num_single_del_mismatch; + } else if (!is_timestamp_eligible_for_gc) { + // We cannot drop the SingleDelete as timestamp is enabled, and + // timestamp of this key is greater than or equal to + // *full_history_ts_low_. We will output the SingleDelete. + valid_ = true; } else if (has_outputted_key_ || - DEFINITELY_IN_SNAPSHOT( - ikey_.sequence, earliest_write_conflict_snapshot_)) { + DefinitelyInSnapshot(ikey_.sequence, + earliest_write_conflict_snapshot_) || + (earliest_snapshot_ < earliest_write_conflict_snapshot_ && + DefinitelyInSnapshot(ikey_.sequence, + earliest_snapshot_))) { // Found a matching value, we can drop the single delete and the // value. It is safe to drop both records since we've already // outputted a key in this snapshot, or there is no earlier @@ -439,9 +669,9 @@ ++iter_stats_.num_record_drop_hidden; ++iter_stats_.num_record_drop_obsolete; - // Already called input_->Next() once. Call it a second time to + // Already called input_.Next() once. Call it a second time to // skip past the second key. - input_->Next(); + AdvanceInputIter(); } else { // Found a matching value, but we cannot drop both keys since // there is an earlier snapshot and we need to leave behind a record @@ -455,11 +685,17 @@ // Set up the Put to be outputted in the next iteration. // (Optimization 3). clear_and_output_next_key_ = true; + TEST_SYNC_POINT_CALLBACK( + "CompactionIterator::NextFromInput:KeepSDForWW", + /*arg=*/nullptr); } } else { // We hit the next snapshot without hitting a put, so the iterator // returns the single delete. valid_ = true; + TEST_SYNC_POINT_CALLBACK( + "CompactionIterator::NextFromInput:SingleDelete:3", + const_cast(c)); } } else { // We are at the end of the input, could not parse the next key, or hit @@ -470,9 +706,11 @@ // iteration. If the next key is corrupt, we return before the // comparison, so the value of has_current_user_key does not matter. has_current_user_key_ = false; - if (compaction_ != nullptr && IN_EARLIEST_SNAPSHOT(ikey_.sequence) && + if (compaction_ != nullptr && + DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) && compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key, - &level_ptrs_)) { + &level_ptrs_) && + is_timestamp_eligible_for_gc) { // Key doesn't exist outside of this range. // Can compact out this SingleDelete. ++iter_stats_.num_record_drop_obsolete; @@ -480,6 +718,11 @@ if (!bottommost_level_) { ++iter_stats_.num_optimized_del_drop_obsolete; } + } else if (last_key_seq_zeroed_) { + // Skip. + ++iter_stats_.num_record_drop_hidden; + ++iter_stats_.num_record_drop_obsolete; + assert(bottommost_level_); } else { // Output SingleDelete valid_ = true; @@ -508,10 +751,13 @@ last_sequence, current_user_key_sequence_); } - ++iter_stats_.num_record_drop_hidden; // (A) - input_->Next(); - } else if (compaction_ != nullptr && ikey_.type == kTypeDeletion && - IN_EARLIEST_SNAPSHOT(ikey_.sequence) && + ++iter_stats_.num_record_drop_hidden; // rule (A) + AdvanceInputIter(); + } else if (compaction_ != nullptr && + (ikey_.type == kTypeDeletion || + (ikey_.type == kTypeDeletionWithTimestamp && + cmp_with_history_ts_low_ < 0)) && + DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) && ikeyNotNeededForIncrementalSnapshot() && compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key, &level_ptrs_)) { @@ -534,30 +780,54 @@ // given that: // (1) The deletion is earlier than earliest_write_conflict_snapshot, and // (2) No value exist earlier than the deletion. + // + // Note also that a deletion marker of type kTypeDeletionWithTimestamp + // will be treated as a different user key unless the timestamp is older + // than *full_history_ts_low_. ++iter_stats_.num_record_drop_obsolete; if (!bottommost_level_) { ++iter_stats_.num_optimized_del_drop_obsolete; } - input_->Next(); - } else if ((ikey_.type == kTypeDeletion) && bottommost_level_ && - ikeyNotNeededForIncrementalSnapshot()) { + AdvanceInputIter(); + } else if ((ikey_.type == kTypeDeletion || + (ikey_.type == kTypeDeletionWithTimestamp && + cmp_with_history_ts_low_ < 0)) && + bottommost_level_ && ikeyNotNeededForIncrementalSnapshot()) { // Handle the case where we have a delete key at the bottom most level // We can skip outputting the key iff there are no subsequent puts for this // key + assert(!compaction_ || compaction_->KeyNotExistsBeyondOutputLevel( + ikey_.user_key, &level_ptrs_)); ParsedInternalKey next_ikey; - input_->Next(); - // Skip over all versions of this key that happen to occur in the same snapshot - // range as the delete - while (input_->Valid() && ParseInternalKey(input_->key(), &next_ikey) && - cmp_->Equal(ikey_.user_key, next_ikey.user_key) && + AdvanceInputIter(); +#ifndef NDEBUG + const Compaction* c = + compaction_ ? compaction_->real_compaction() : nullptr; +#endif + TEST_SYNC_POINT_CALLBACK( + "CompactionIterator::NextFromInput:BottommostDelete:1", + const_cast(c)); + // Skip over all versions of this key that happen to occur in the same + // snapshot range as the delete. + // + // Note that a deletion marker of type kTypeDeletionWithTimestamp will be + // considered to have a different user key unless the timestamp is older + // than *full_history_ts_low_. + while (!IsPausingManualCompaction() && !IsShuttingDown() && + input_.Valid() && + (ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_) + .ok()) && + cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key) && (prev_snapshot == 0 || - DEFINITELY_NOT_IN_SNAPSHOT(next_ikey.sequence, prev_snapshot))) { - input_->Next(); + DefinitelyNotInSnapshot(next_ikey.sequence, prev_snapshot))) { + AdvanceInputIter(); } // If you find you still need to output a row with this key, we need to output the // delete too - if (input_->Valid() && ParseInternalKey(input_->key(), &next_ikey) && - cmp_->Equal(ikey_.user_key, next_ikey.user_key)) { + if (input_.Valid() && + (ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_) + .ok()) && + cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) { valid_ = true; at_next_ = true; } @@ -569,12 +839,15 @@ } pinned_iters_mgr_.StartPinning(); + // We know the merge type entry is not hidden, otherwise we would // have hit (A) // We encapsulate the merge related state machine in a different // object to minimize change to the existing flow. - Status s = merge_helper_->MergeUntil(input_, range_del_agg_, - prev_snapshot, bottommost_level_); + Status s = merge_helper_->MergeUntil( + &input_, range_del_agg_, prev_snapshot, bottommost_level_, + allow_data_in_errors_, blob_fetcher_.get(), prefetch_buffers_.get(), + &iter_stats_); merge_out_iter_.SeekToFirst(); if (!s.ok() && !s.IsMergeInProgress()) { @@ -585,14 +858,13 @@ // These will be correctly set below. key_ = merge_out_iter_.key(); value_ = merge_out_iter_.value(); - bool valid_key __attribute__((__unused__)); - valid_key = ParseInternalKey(key_, &ikey_); + pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_); // MergeUntil stops when it encounters a corrupt key and does not // include them in the result, so we expect the keys here to valid. - assert(valid_key); - if (!valid_key) { - ROCKS_LOG_FATAL(info_log_, "Invalid key (%s) in compaction", - key_.ToString(true).c_str()); + assert(pik_status.ok()); + if (!pik_status.ok()) { + ROCKS_LOG_FATAL(info_log_, "Invalid key in compaction. %s", + pik_status.getState()); } // Keep current_key_ in sync. current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); @@ -618,14 +890,14 @@ if (should_delete) { ++iter_stats_.num_record_drop_hidden; ++iter_stats_.num_record_drop_range_del; - input_->Next(); + AdvanceInputIter(); } else { valid_ = true; } } if (need_skip) { - input_->Seek(skip_until); + SkipUntil(skip_until); } } @@ -638,25 +910,144 @@ } } -void CompactionIterator::PrepareOutput() { - if (valid_) { - if (compaction_filter_ && ikey_.type == kTypeBlobIndex) { - const auto blob_decision = compaction_filter_->PrepareBlobOutput( - user_key(), value_, &compaction_filter_value_); - - if (blob_decision == CompactionFilter::BlobDecision::kCorruption) { - status_ = Status::Corruption( - "Corrupted blob reference encountered during GC"); +bool CompactionIterator::ExtractLargeValueIfNeededImpl() { + if (!blob_file_builder_) { + return false; + } + + blob_index_.clear(); + const Status s = blob_file_builder_->Add(user_key(), value_, &blob_index_); + + if (!s.ok()) { + status_ = s; + valid_ = false; + + return false; + } + + if (blob_index_.empty()) { + return false; + } + + value_ = blob_index_; + + return true; +} + +void CompactionIterator::ExtractLargeValueIfNeeded() { + assert(ikey_.type == kTypeValue); + + if (!ExtractLargeValueIfNeededImpl()) { + return; + } + + ikey_.type = kTypeBlobIndex; + current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); +} + +void CompactionIterator::GarbageCollectBlobIfNeeded() { + assert(ikey_.type == kTypeBlobIndex); + + if (!compaction_) { + return; + } + + // GC for integrated BlobDB + if (compaction_->enable_blob_garbage_collection()) { + BlobIndex blob_index; + + { + const Status s = blob_index.DecodeFrom(value_); + + if (!s.ok()) { + status_ = s; valid_ = false; - } else if (blob_decision == CompactionFilter::BlobDecision::kIOError) { - status_ = Status::IOError("Could not relocate blob during GC"); + + return; + } + } + + if (blob_index.file_number() >= + blob_garbage_collection_cutoff_file_number_) { + return; + } + + FilePrefetchBuffer* prefetch_buffer = + prefetch_buffers_ ? prefetch_buffers_->GetOrCreatePrefetchBuffer( + blob_index.file_number()) + : nullptr; + + uint64_t bytes_read = 0; + + { + assert(blob_fetcher_); + + const Status s = blob_fetcher_->FetchBlob( + user_key(), blob_index, prefetch_buffer, &blob_value_, &bytes_read); + + if (!s.ok()) { + status_ = s; valid_ = false; - } else if (blob_decision == - CompactionFilter::BlobDecision::kChangeValue) { - value_ = compaction_filter_value_; + + return; } } + ++iter_stats_.num_blobs_read; + iter_stats_.total_blob_bytes_read += bytes_read; + + ++iter_stats_.num_blobs_relocated; + iter_stats_.total_blob_bytes_relocated += blob_index.size(); + + value_ = blob_value_; + + if (ExtractLargeValueIfNeededImpl()) { + return; + } + + ikey_.type = kTypeValue; + current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type); + + return; + } + + // GC for stacked BlobDB + if (compaction_filter_ && + compaction_filter_->IsStackedBlobDbInternalCompactionFilter()) { + const auto blob_decision = compaction_filter_->PrepareBlobOutput( + user_key(), value_, &compaction_filter_value_); + + if (blob_decision == CompactionFilter::BlobDecision::kCorruption) { + status_ = + Status::Corruption("Corrupted blob reference encountered during GC"); + valid_ = false; + + return; + } + + if (blob_decision == CompactionFilter::BlobDecision::kIOError) { + status_ = Status::IOError("Could not relocate blob during GC"); + valid_ = false; + + return; + } + + if (blob_decision == CompactionFilter::BlobDecision::kChangeValue) { + value_ = compaction_filter_value_; + + return; + } + } +} + +void CompactionIterator::PrepareOutput() { + if (valid_) { + if (ikey_.type == kTypeValue) { + ExtractLargeValueIfNeeded(); + } else if (ikey_.type == kTypeBlobIndex) { + GarbageCollectBlobIfNeeded(); + } + // Zeroing out the sequence number leads to better compression. // If this is the bottommost level (no files in lower levels) // and the earliest snapshot is larger than this seqno @@ -671,15 +1062,34 @@ if (valid_ && compaction_ != nullptr && !compaction_->allow_ingest_behind() && ikeyNotNeededForIncrementalSnapshot() && bottommost_level_ && - IN_EARLIEST_SNAPSHOT(ikey_.sequence) && ikey_.type != kTypeMerge) { - assert(ikey_.type != kTypeDeletion && ikey_.type != kTypeSingleDeletion); - if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion) { + DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) && + ikey_.type != kTypeMerge) { + assert(ikey_.type != kTypeDeletion); + assert(ikey_.type != kTypeSingleDeletion || + (timestamp_size_ || full_history_ts_low_)); + if (ikey_.type == kTypeDeletion || + (ikey_.type == kTypeSingleDeletion && + (!timestamp_size_ || !full_history_ts_low_))) { ROCKS_LOG_FATAL(info_log_, "Unexpected key type %d for seq-zero optimization", ikey_.type); } ikey_.sequence = 0; - current_key_.UpdateInternalKey(0, ikey_.type); + last_key_seq_zeroed_ = true; + TEST_SYNC_POINT_CALLBACK("CompactionIterator::PrepareOutput:ZeroingSeq", + &ikey_); + if (!timestamp_size_) { + current_key_.UpdateInternalKey(0, ikey_.type); + } else if (full_history_ts_low_ && cmp_with_history_ts_low_ < 0) { + // We can also zero out timestamp for better compression. + // For the same user key (excluding timestamp), the timestamp-based + // history can be collapsed to save some space if the timestamp is + // older than *full_history_ts_low_. + const std::string kTsMin(timestamp_size_, static_cast(0)); + const Slice ts_slice = kTsMin; + ikey_.SetTimestamp(ts_slice); + current_key_.UpdateInternalKey(0, ikey_.type, &ts_slice); + } } } } @@ -736,39 +1146,68 @@ (ikey_.sequence < preserve_deletes_seqnum_); } -bool CompactionIterator::IsInEarliestSnapshot(SequenceNumber sequence) { - assert(snapshot_checker_ != nullptr); - bool pre_condition = (earliest_snapshot_ == kMaxSequenceNumber || - (earliest_snapshot_iter_ != snapshots_->end() && - *earliest_snapshot_iter_ == earliest_snapshot_)); - assert(pre_condition); - if (!pre_condition) { - ROCKS_LOG_FATAL(info_log_, - "Pre-Condition is not hold in IsInEarliestSnapshot"); +uint64_t CompactionIterator::ComputeBlobGarbageCollectionCutoffFileNumber( + const CompactionProxy* compaction) { + if (!compaction) { + return 0; } - auto in_snapshot = - snapshot_checker_->CheckInSnapshot(sequence, earliest_snapshot_); - while (UNLIKELY(in_snapshot == SnapshotCheckerResult::kSnapshotReleased)) { - // Avoid the the current earliest_snapshot_ being return as - // earliest visible snapshot for the next value. So if a value's sequence - // is zero-ed out by PrepareOutput(), the next value will be compact out. - released_snapshots_.insert(earliest_snapshot_); - earliest_snapshot_iter_++; - if (earliest_snapshot_iter_ == snapshots_->end()) { - earliest_snapshot_ = kMaxSequenceNumber; - } else { - earliest_snapshot_ = *earliest_snapshot_iter_; - } - in_snapshot = - snapshot_checker_->CheckInSnapshot(sequence, earliest_snapshot_); + if (!compaction->enable_blob_garbage_collection()) { + return 0; } - assert(in_snapshot != SnapshotCheckerResult::kSnapshotReleased); - if (in_snapshot == SnapshotCheckerResult::kSnapshotReleased) { - ROCKS_LOG_FATAL(info_log_, - "Unexpected released snapshot in IsInEarliestSnapshot"); + + const Version* const version = compaction->input_version(); + assert(version); + + const VersionStorageInfo* const storage_info = version->storage_info(); + assert(storage_info); + + const auto& blob_files = storage_info->GetBlobFiles(); + + auto it = blob_files.begin(); + std::advance( + it, compaction->blob_garbage_collection_age_cutoff() * blob_files.size()); + + return it != blob_files.end() ? it->first + : std::numeric_limits::max(); +} + +std::unique_ptr CompactionIterator::CreateBlobFetcherIfNeeded( + const CompactionProxy* compaction) { + if (!compaction) { + return nullptr; + } + + const Version* const version = compaction->input_version(); + if (!version) { + return nullptr; } - return in_snapshot == SnapshotCheckerResult::kInSnapshot; + + return std::unique_ptr(new BlobFetcher(version, ReadOptions())); +} + +std::unique_ptr +CompactionIterator::CreatePrefetchBufferCollectionIfNeeded( + const CompactionProxy* compaction) { + if (!compaction) { + return nullptr; + } + + if (!compaction->input_version()) { + return nullptr; + } + + if (compaction->allow_mmap_reads()) { + return nullptr; + } + + const uint64_t readahead_size = compaction->blob_compaction_readahead_size(); + if (!readahead_size) { + return nullptr; + } + + return std::unique_ptr( + new PrefetchBufferCollection(readahead_size)); } } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator.h 2025-05-19 16:14:27.000000000 +0000 @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -21,39 +22,153 @@ namespace ROCKSDB_NAMESPACE { +class BlobFileBuilder; +class BlobFetcher; +class PrefetchBufferCollection; + +// A wrapper of internal iterator whose purpose is to count how +// many entries there are in the iterator. +class SequenceIterWrapper : public InternalIterator { + public: + SequenceIterWrapper(InternalIterator* iter, const Comparator* cmp, + bool need_count_entries) + : icmp_(cmp, /*named=*/false), + inner_iter_(iter), + need_count_entries_(need_count_entries) {} + bool Valid() const override { return inner_iter_->Valid(); } + Status status() const override { return inner_iter_->status(); } + void Next() override { + num_itered_++; + inner_iter_->Next(); + } + void Seek(const Slice& target) override { + if (!need_count_entries_) { + inner_iter_->Seek(target); + } else { + // For flush cases, we need to count total number of entries, so we + // do Next() rather than Seek(). + while (inner_iter_->Valid() && + icmp_.Compare(inner_iter_->key(), target) < 0) { + Next(); + } + } + } + Slice key() const override { return inner_iter_->key(); } + Slice value() const override { return inner_iter_->value(); } + + // Unused InternalIterator methods + void SeekToFirst() override { assert(false); } + void Prev() override { assert(false); } + void SeekForPrev(const Slice& /* target */) override { assert(false); } + void SeekToLast() override { assert(false); } + + uint64_t num_itered() const { return num_itered_; } + + private: + InternalKeyComparator icmp_; + InternalIterator* inner_iter_; // not owned + uint64_t num_itered_ = 0; + bool need_count_entries_; +}; + class CompactionIterator { public: // A wrapper around Compaction. Has a much smaller interface, only what // CompactionIterator uses. Tests can override it. class CompactionProxy { public: - explicit CompactionProxy(const Compaction* compaction) - : compaction_(compaction) {} - virtual ~CompactionProxy() = default; - virtual int level(size_t /*compaction_input_level*/ = 0) const { - return compaction_->level(); - } + + virtual int level() const = 0; + virtual bool KeyNotExistsBeyondOutputLevel( - const Slice& user_key, std::vector* level_ptrs) const { + const Slice& user_key, std::vector* level_ptrs) const = 0; + + virtual bool bottommost_level() const = 0; + + virtual int number_levels() const = 0; + + virtual Slice GetLargestUserKey() const = 0; + + virtual bool allow_ingest_behind() const = 0; + + virtual bool preserve_deletes() const = 0; + + virtual bool allow_mmap_reads() const = 0; + + virtual bool enable_blob_garbage_collection() const = 0; + + virtual double blob_garbage_collection_age_cutoff() const = 0; + + virtual uint64_t blob_compaction_readahead_size() const = 0; + + virtual const Version* input_version() const = 0; + + virtual bool DoesInputReferenceBlobFiles() const = 0; + + virtual const Compaction* real_compaction() const = 0; + }; + + class RealCompaction : public CompactionProxy { + public: + explicit RealCompaction(const Compaction* compaction) + : compaction_(compaction) { + assert(compaction_); + assert(compaction_->immutable_options()); + assert(compaction_->mutable_cf_options()); + } + + int level() const override { return compaction_->level(); } + + bool KeyNotExistsBeyondOutputLevel( + const Slice& user_key, std::vector* level_ptrs) const override { return compaction_->KeyNotExistsBeyondOutputLevel(user_key, level_ptrs); } - virtual bool bottommost_level() const { + + bool bottommost_level() const override { return compaction_->bottommost_level(); } - virtual int number_levels() const { return compaction_->number_levels(); } - virtual Slice GetLargestUserKey() const { + + int number_levels() const override { return compaction_->number_levels(); } + + Slice GetLargestUserKey() const override { return compaction_->GetLargestUserKey(); } - virtual bool allow_ingest_behind() const { - return compaction_->immutable_cf_options()->allow_ingest_behind; + + bool allow_ingest_behind() const override { + return compaction_->immutable_options()->allow_ingest_behind; + } + + bool preserve_deletes() const override { + return compaction_->immutable_options()->preserve_deletes; + } + + bool allow_mmap_reads() const override { + return compaction_->immutable_options()->allow_mmap_reads; + } + + bool enable_blob_garbage_collection() const override { + return compaction_->mutable_cf_options()->enable_blob_garbage_collection; } - virtual bool preserve_deletes() const { - return compaction_->immutable_cf_options()->preserve_deletes; + + double blob_garbage_collection_age_cutoff() const override { + return compaction_->mutable_cf_options() + ->blob_garbage_collection_age_cutoff; } - protected: - CompactionProxy() = default; + uint64_t blob_compaction_readahead_size() const override { + return compaction_->mutable_cf_options()->blob_compaction_readahead_size; + } + + const Version* input_version() const override { + return compaction_->input_version(); + } + + bool DoesInputReferenceBlobFiles() const override { + return compaction_->DoesInputReferenceBlobFiles(); + } + + const Compaction* real_compaction() const override { return compaction_; } private: const Compaction* compaction_; @@ -66,12 +181,15 @@ const SnapshotChecker* snapshot_checker, Env* env, bool report_detailed_time, bool expect_valid_internal_key, CompactionRangeDelAggregator* range_del_agg, + BlobFileBuilder* blob_file_builder, bool allow_data_in_errors, const Compaction* compaction = nullptr, const CompactionFilter* compaction_filter = nullptr, const std::atomic* shutting_down = nullptr, const SequenceNumber preserve_deletes_seqnum = 0, - const std::atomic* manual_compaction_paused = nullptr, - const std::shared_ptr info_log = nullptr); + const std::atomic* manual_compaction_paused = nullptr, + const std::atomic* manual_compaction_canceled = nullptr, + const std::shared_ptr info_log = nullptr, + const std::string* full_history_ts_low = nullptr); // Constructor with custom CompactionProxy, used for tests. CompactionIterator( @@ -81,12 +199,15 @@ const SnapshotChecker* snapshot_checker, Env* env, bool report_detailed_time, bool expect_valid_internal_key, CompactionRangeDelAggregator* range_del_agg, + BlobFileBuilder* blob_file_builder, bool allow_data_in_errors, std::unique_ptr compaction, const CompactionFilter* compaction_filter = nullptr, const std::atomic* shutting_down = nullptr, const SequenceNumber preserve_deletes_seqnum = 0, - const std::atomic* manual_compaction_paused = nullptr, - const std::shared_ptr info_log = nullptr); + const std::atomic* manual_compaction_paused = nullptr, + const std::atomic* manual_compaction_canceled = nullptr, + const std::shared_ptr info_log = nullptr, + const std::string* full_history_ts_low = nullptr); ~CompactionIterator(); @@ -110,18 +231,39 @@ bool Valid() const { return valid_; } const Slice& user_key() const { return current_user_key_; } const CompactionIterationStats& iter_stats() const { return iter_stats_; } + uint64_t num_input_entry_scanned() const { return input_.num_itered(); } private: // Processes the input stream to find the next output void NextFromInput(); - // Do last preparations before presenting the output to the callee. At this - // point this only zeroes out the sequence number if possible for better - // compression. + // Do final preparations before presenting the output to the callee. void PrepareOutput(); + // Passes the output value to the blob file builder (if any), and replaces it + // with the corresponding blob reference if it has been actually written to a + // blob file (i.e. if it passed the value size check). Returns true if the + // value got extracted to a blob file, false otherwise. + bool ExtractLargeValueIfNeededImpl(); + + // Extracts large values as described above, and updates the internal key's + // type to kTypeBlobIndex if the value got extracted. Should only be called + // for regular values (kTypeValue). + void ExtractLargeValueIfNeeded(); + + // Relocates valid blobs residing in the oldest blob files if garbage + // collection is enabled. Relocated blobs are written to new blob files or + // inlined in the LSM tree depending on the current settings (i.e. + // enable_blob_files and min_blob_size). Should only be called for blob + // references (kTypeBlobIndex). + // + // Note: the stacked BlobDB implementation's compaction filter based GC + // algorithm is also called from here. + void GarbageCollectBlobIfNeeded(); + // Invoke compaction filter if needed. - void InvokeFilterIfNeeded(bool* need_skip, Slice* skip_until); + // Return true on success, false on failures (e.g.: kIOError). + bool InvokeFilterIfNeeded(bool* need_skip, Slice* skip_until); // Given a sequence number, return the sequence number of the // earliest snapshot that this sequence number is visible in. @@ -143,9 +285,32 @@ SnapshotCheckerResult::kInSnapshot; } - bool IsInEarliestSnapshot(SequenceNumber sequence); + bool DefinitelyInSnapshot(SequenceNumber seq, SequenceNumber snapshot); + + bool DefinitelyNotInSnapshot(SequenceNumber seq, SequenceNumber snapshot); + + // Extract user-defined timestamp from user key if possible and compare it + // with *full_history_ts_low_ if applicable. + inline void UpdateTimestampAndCompareWithFullHistoryLow() { + if (!timestamp_size_) { + return; + } + Slice ts = ExtractTimestampFromUserKey(ikey_.user_key, timestamp_size_); + curr_ts_.assign(ts.data(), ts.size()); + if (full_history_ts_low_) { + cmp_with_history_ts_low_ = + cmp_->CompareTimestamp(ts, *full_history_ts_low_); + } + } - InternalIterator* input_; + static uint64_t ComputeBlobGarbageCollectionCutoffFileNumber( + const CompactionProxy* compaction); + static std::unique_ptr CreateBlobFetcherIfNeeded( + const CompactionProxy* compaction); + static std::unique_ptr + CreatePrefetchBufferCollectionIfNeeded(const CompactionProxy* compaction); + + SequenceIterWrapper input_; const Comparator* cmp_; MergeHelper* merge_helper_; const std::vector* snapshots_; @@ -159,13 +324,16 @@ const SequenceNumber earliest_write_conflict_snapshot_; const SnapshotChecker* const snapshot_checker_; Env* env_; + SystemClock* clock_; bool report_detailed_time_; bool expect_valid_internal_key_; CompactionRangeDelAggregator* range_del_agg_; + BlobFileBuilder* blob_file_builder_; std::unique_ptr compaction_; const CompactionFilter* compaction_filter_; const std::atomic* shutting_down_; - const std::atomic* manual_compaction_paused_; + const std::atomic* manual_compaction_paused_; + const std::atomic* manual_compaction_canceled_; const SequenceNumber preserve_deletes_seqnum_; bool bottommost_level_; bool valid_ = false; @@ -173,6 +341,20 @@ SequenceNumber earliest_snapshot_; SequenceNumber latest_snapshot_; + std::shared_ptr info_log_; + + bool allow_data_in_errors_; + + // Comes from comparator. + const size_t timestamp_size_; + + // Lower bound timestamp to retain full history in terms of user-defined + // timestamp. If a key's timestamp is older than full_history_ts_low_, then + // the key *may* be eligible for garbage collection (GC). The skipping logic + // is in `NextFromInput()` and `PrepareOutput()`. + // If nullptr, NO GC will be performed and all history will be preserved. + const std::string* const full_history_ts_low_; + // State // // Points to a copy of the current compaction iterator output (current_key_) @@ -191,11 +373,13 @@ // Stores whether ikey_.user_key is valid. If set to false, the user key is // not compared against the current key in the underlying iterator. bool has_current_user_key_ = false; - bool at_next_ = false; // If false, the iterator - // Holds a copy of the current compaction iterator output (or current key in - // the underlying iterator during NextFromInput()). + // If false, the iterator holds a copy of the current compaction iterator + // output (or current key in the underlying iterator during NextFromInput()). + bool at_next_ = false; + IterKey current_key_; Slice current_user_key_; + std::string curr_ts_; SequenceNumber current_user_key_sequence_; SequenceNumber current_user_key_snapshot_; @@ -210,6 +394,14 @@ // PinnedIteratorsManager used to pin input_ Iterator blocks while reading // merge operands and then releasing them after consuming them. PinnedIteratorsManager pinned_iters_mgr_; + + uint64_t blob_garbage_collection_cutoff_file_number_; + + std::unique_ptr blob_fetcher_; + std::unique_ptr prefetch_buffers_; + + std::string blob_index_; + PinnableSlice blob_value_; std::string compaction_filter_value_; InternalKey compaction_filter_skip_until_; // "level_ptrs" holds indices that remember which file of an associated @@ -224,7 +416,19 @@ // Used to avoid purging uncommitted values. The application can specify // uncommitted values by providing a SnapshotChecker object. bool current_key_committed_; - std::shared_ptr info_log_; + + // Saved result of ucmp->CompareTimestamp(current_ts_, *full_history_ts_low_) + int cmp_with_history_ts_low_; + + const int level_; + + // True if the previous internal key (same user key)'s sequence number has + // just been zeroed out during bottommost compaction. + bool last_key_seq_zeroed_{false}; + + void AdvanceInputIter() { input_.Next(); } + + void SkipUntil(const Slice& skip_until) { input_.Seek(skip_until); } bool IsShuttingDown() { // This is a best-effort facility, so memory_order_relaxed is sufficient. @@ -233,8 +437,27 @@ bool IsPausingManualCompaction() { // This is a best-effort facility, so memory_order_relaxed is sufficient. - return manual_compaction_paused_ && - manual_compaction_paused_->load(std::memory_order_relaxed); + return (manual_compaction_paused_ && + manual_compaction_paused_->load(std::memory_order_relaxed) > 0) || + (manual_compaction_canceled_ && + manual_compaction_canceled_->load(std::memory_order_relaxed)); } }; + +inline bool CompactionIterator::DefinitelyInSnapshot(SequenceNumber seq, + SequenceNumber snapshot) { + return ((seq) <= (snapshot) && + (snapshot_checker_ == nullptr || + LIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) == + SnapshotCheckerResult::kInSnapshot))); +} + +inline bool CompactionIterator::DefinitelyNotInSnapshot( + SequenceNumber seq, SequenceNumber snapshot) { + return ((seq) > (snapshot) || + (snapshot_checker_ != nullptr && + UNLIKELY(snapshot_checker_->CheckInSnapshot((seq), (snapshot)) == + SnapshotCheckerResult::kNotInSnapshot))); +} + } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -3,15 +3,17 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). +#include "db/compaction/compaction_iterator.h" #include #include -#include "db/compaction/compaction_iterator.h" +#include "db/dbformat.h" #include "port/port.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" +#include "util/vector_iterator.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { @@ -38,7 +40,7 @@ // Compaction filter that gets stuck when it sees a particular key, // then gets unstuck when told to. -// Always returns Decition::kRemove. +// Always returns Decision::kRemove. class StallingFilter : public CompactionFilter { public: Decision FilterV2(int /*level*/, const Slice& key, ValueType /*type*/, @@ -86,7 +88,7 @@ const char* Name() const override { return "AllKeysCompactionFilter"; } }; -class LoggingForwardVectorIterator : public InternalIterator { +class LoggingForwardVectorIterator : public VectorIterator { public: struct Action { enum class Type { @@ -108,22 +110,19 @@ LoggingForwardVectorIterator(const std::vector& keys, const std::vector& values) - : keys_(keys), values_(values), current_(keys.size()) { - assert(keys_.size() == values_.size()); + : VectorIterator(keys, values) { + current_ = keys_.size(); } - bool Valid() const override { return current_ < keys_.size(); } - void SeekToFirst() override { log.emplace_back(Action::Type::SEEK_TO_FIRST); - current_ = 0; + VectorIterator::SeekToFirst(); } void SeekToLast() override { assert(false); } void Seek(const Slice& target) override { log.emplace_back(Action::Type::SEEK, target.ToString()); - current_ = std::lower_bound(keys_.begin(), keys_.end(), target.ToString()) - - keys_.begin(); + VectorIterator::Seek(target); } void SeekForPrev(const Slice& /*target*/) override { assert(false); } @@ -131,54 +130,66 @@ void Next() override { assert(Valid()); log.emplace_back(Action::Type::NEXT); - current_++; + VectorIterator::Next(); } void Prev() override { assert(false); } Slice key() const override { assert(Valid()); - return Slice(keys_[current_]); + return VectorIterator::key(); } Slice value() const override { assert(Valid()); - return Slice(values_[current_]); + return VectorIterator::value(); } - Status status() const override { return Status::OK(); } - std::vector log; - - private: - std::vector keys_; - std::vector values_; - size_t current_; }; class FakeCompaction : public CompactionIterator::CompactionProxy { public: - FakeCompaction() = default; + int level() const override { return 0; } - int level(size_t /*compaction_input_level*/) const override { return 0; } bool KeyNotExistsBeyondOutputLevel( const Slice& /*user_key*/, std::vector* /*level_ptrs*/) const override { return is_bottommost_level || key_not_exists_beyond_output_level; } + bool bottommost_level() const override { return is_bottommost_level; } + int number_levels() const override { return 1; } + Slice GetLargestUserKey() const override { return "\xff\xff\xff\xff\xff\xff\xff\xff\xff"; } - bool allow_ingest_behind() const override { return false; } + + bool allow_ingest_behind() const override { return is_allow_ingest_behind; } bool preserve_deletes() const override { return false; } + bool allow_mmap_reads() const override { return false; } + + bool enable_blob_garbage_collection() const override { return false; } + + double blob_garbage_collection_age_cutoff() const override { return 0.0; } + + uint64_t blob_compaction_readahead_size() const override { return 0; } + + const Version* input_version() const override { return nullptr; } + + bool DoesInputReferenceBlobFiles() const override { return false; } + + const Compaction* real_compaction() const override { return nullptr; } + bool key_not_exists_beyond_output_level = false; bool is_bottommost_level = false; + + bool is_allow_ingest_behind = false; }; -// A simplifed snapshot checker which assumes each snapshot has a global +// A simplified snapshot checker which assumes each snapshot has a global // last visible sequence. class TestSnapshotChecker : public SnapshotChecker { public: @@ -214,6 +225,9 @@ CompactionIteratorTest() : cmp_(BytewiseComparator()), icmp_(cmp_), snapshots_({}) {} + explicit CompactionIteratorTest(const Comparator* ucmp) + : cmp_(ucmp), icmp_(cmp_), snapshots_({}) {} + void InitIterators( const std::vector& ks, const std::vector& vs, const std::vector& range_del_ks, @@ -222,9 +236,11 @@ SequenceNumber last_committed_sequence = kMaxSequenceNumber, MergeOperator* merge_op = nullptr, CompactionFilter* filter = nullptr, bool bottommost_level = false, - SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber) { + SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber, + bool key_not_exists_beyond_output_level = false, + const std::string* full_history_ts_low = nullptr) { std::unique_ptr unfragmented_range_del_iter( - new test::VectorIterator(range_del_ks, range_del_vs)); + new VectorIterator(range_del_ks, range_del_vs, &icmp_)); auto tombstone_list = std::make_shared( std::move(unfragmented_range_del_iter), icmp_); std::unique_ptr range_del_iter( @@ -234,9 +250,12 @@ range_del_agg_->AddTombstones(std::move(range_del_iter)); std::unique_ptr compaction; - if (filter || bottommost_level) { + if (filter || bottommost_level || key_not_exists_beyond_output_level) { compaction_proxy_ = new FakeCompaction(); compaction_proxy_->is_bottommost_level = bottommost_level; + compaction_proxy_->is_allow_ingest_behind = AllowIngestBehind(); + compaction_proxy_->key_not_exists_beyond_output_level = + key_not_exists_beyond_output_level; compaction.reset(compaction_proxy_); } bool use_snapshot_checker = UseSnapshotChecker() || GetParam(); @@ -249,13 +268,23 @@ 0 /*latest_snapshot*/, snapshot_checker_.get(), 0 /*level*/, nullptr /*statistics*/, &shutting_down_)); + if (c_iter_) { + // Since iter_ is still used in ~CompactionIterator(), we call + // ~CompactionIterator() first. + c_iter_.reset(); + } iter_.reset(new LoggingForwardVectorIterator(ks, vs)); iter_->SeekToFirst(); c_iter_.reset(new CompactionIterator( iter_.get(), cmp_, merge_helper_.get(), last_sequence, &snapshots_, earliest_write_conflict_snapshot, snapshot_checker_.get(), Env::Default(), false /* report_detailed_time */, false, - range_del_agg_.get(), std::move(compaction), filter, &shutting_down_)); + range_del_agg_.get(), nullptr /* blob_file_builder */, + true /*allow_data_in_errors*/, std::move(compaction), filter, + &shutting_down_, /*preserve_deletes_seqnum=*/0, + /*manual_compaction_paused=*/nullptr, + /*manual_compaction_canceled=*/nullptr, /*info_log=*/nullptr, + full_history_ts_low)); } void AddSnapshot(SequenceNumber snapshot, @@ -266,6 +295,8 @@ virtual bool UseSnapshotChecker() const { return false; } + virtual bool AllowIngestBehind() const { return false; } + void RunTest( const std::vector& input_keys, const std::vector& input_values, @@ -275,10 +306,13 @@ MergeOperator* merge_operator = nullptr, CompactionFilter* compaction_filter = nullptr, bool bottommost_level = false, - SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber) { + SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber, + bool key_not_exists_beyond_output_level = false, + const std::string* full_history_ts_low = nullptr) { InitIterators(input_keys, input_values, {}, {}, kMaxSequenceNumber, last_committed_seq, merge_operator, compaction_filter, - bottommost_level, earliest_write_conflict_snapshot); + bottommost_level, earliest_write_conflict_snapshot, + key_not_exists_beyond_output_level, full_history_ts_low); c_iter_->SeekToFirst(); for (size_t i = 0; i < expected_keys.size(); i++) { std::string info = "i = " + ToString(i); @@ -288,9 +322,15 @@ ASSERT_EQ(expected_values[i], c_iter_->value().ToString()) << info; c_iter_->Next(); } + ASSERT_OK(c_iter_->status()); ASSERT_FALSE(c_iter_->Valid()); } + void ClearSnapshots() { + snapshots_.clear(); + snapshot_map_.clear(); + } + const Comparator* cmp_; const InternalKeyComparator icmp_; std::vector snapshots_; @@ -312,6 +352,7 @@ test::KeyStr("a", 3, kTypeValue)}, {"", "val"}, {}, {}, 5); c_iter_->SeekToFirst(); + ASSERT_OK(c_iter_->status()); ASSERT_FALSE(c_iter_->Valid()); } @@ -333,6 +374,7 @@ ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("b", 10, kTypeValue), c_iter_->key().ToString()); c_iter_->Next(); + ASSERT_OK(c_iter_->status()); ASSERT_FALSE(c_iter_->Valid()); } @@ -349,6 +391,7 @@ ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("night", 3, kTypeValue), c_iter_->key().ToString()); c_iter_->Next(); + ASSERT_OK(c_iter_->status()); ASSERT_FALSE(c_iter_->Valid()); } @@ -370,6 +413,7 @@ ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ(test::KeyStr("night", 40, kTypeValue), c_iter_->key().ToString()); c_iter_->Next(); + ASSERT_OK(c_iter_->status()); ASSERT_FALSE(c_iter_->Valid()); } @@ -463,6 +507,7 @@ ASSERT_EQ(test::KeyStr("h", 91, kTypeValue), c_iter_->key().ToString()); ASSERT_EQ("hv91", c_iter_->value().ToString()); c_iter_->Next(); + ASSERT_OK(c_iter_->status()); ASSERT_FALSE(c_iter_->Valid()); // Check that the compaction iterator did the correct sequence of calls on @@ -656,6 +701,7 @@ ASSERT_TRUE(c_iter_->Valid()); ASSERT_EQ("bv1bv2", c_iter_->value().ToString()); c_iter_->Next(); + ASSERT_OK(c_iter_->status()); ASSERT_EQ("cv1cv2", c_iter_->value().ToString()); } @@ -666,7 +712,7 @@ RunTest({test::KeyStr("a", 1, kTypeValue), test::KeyStr("b", 2, kTypeValue)}, {"v1", "v2"}, {test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 2, kTypeValue)}, - {"v1", "v2"}, kMaxSequenceNumber /*last_commited_seq*/, + {"v1", "v2"}, kMaxSequenceNumber /*last_committed_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } @@ -675,15 +721,14 @@ // permanently. TEST_P(CompactionIteratorTest, RemoveDeletionAtBottomLevel) { AddSnapshot(1); - RunTest({test::KeyStr("a", 1, kTypeDeletion), - test::KeyStr("b", 3, kTypeDeletion), - test::KeyStr("b", 1, kTypeValue)}, - {"", "", ""}, - {test::KeyStr("b", 3, kTypeDeletion), - test::KeyStr("b", 0, kTypeValue)}, - {"", ""}, - kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, - nullptr /*compaction_filter*/, true /*bottommost_level*/); + RunTest( + {test::KeyStr("a", 1, kTypeDeletion), test::KeyStr("b", 3, kTypeDeletion), + test::KeyStr("b", 1, kTypeValue)}, + {"", "", ""}, + {test::KeyStr("b", 3, kTypeDeletion), test::KeyStr("b", 0, kTypeValue)}, + {"", ""}, kMaxSequenceNumber /*last_committed_seq*/, + nullptr /*merge_operator*/, nullptr /*compaction_filter*/, + true /*bottommost_level*/); } // In bottommost level, single deletions earlier than earliest snapshot can be @@ -693,10 +738,22 @@ RunTest({test::KeyStr("a", 1, kTypeSingleDeletion), test::KeyStr("b", 2, kTypeSingleDeletion)}, {"", ""}, {test::KeyStr("b", 2, kTypeSingleDeletion)}, {""}, - kMaxSequenceNumber /*last_commited_seq*/, nullptr /*merge_operator*/, + kMaxSequenceNumber /*last_committed_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } +TEST_P(CompactionIteratorTest, ConvertToPutAtBottom) { + std::shared_ptr merge_op = + MergeOperators::CreateStringAppendOperator(); + RunTest({test::KeyStr("a", 4, kTypeMerge), test::KeyStr("a", 3, kTypeMerge), + test::KeyStr("a", 2, kTypeMerge), test::KeyStr("b", 1, kTypeValue)}, + {"a4", "a3", "a2", "b1"}, + {test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 0, kTypeValue)}, + {"a2,a3,a4", "b1"}, kMaxSequenceNumber /*last_committed_seq*/, + merge_op.get(), nullptr /*compaction_filter*/, + true /*bottomost_level*/); +} + INSTANTIATE_TEST_CASE_P(CompactionIteratorTestInstance, CompactionIteratorTest, testing::Values(true, false)); @@ -838,7 +895,7 @@ {"v1", "v2", "v3"}, {test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 2, kTypeValue), test::KeyStr("c", 3, kTypeValue)}, - {"v1", "v2", "v3"}, kMaxSequenceNumber /*last_commited_seq*/, + {"v1", "v2", "v3"}, kMaxSequenceNumber /*last_committed_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } @@ -849,9 +906,7 @@ RunTest( {test::KeyStr("a", 1, kTypeDeletion), test::KeyStr("b", 2, kTypeDeletion), test::KeyStr("c", 3, kTypeDeletion)}, - {"", "", ""}, - {}, - {"", ""}, kMaxSequenceNumber /*last_commited_seq*/, + {"", "", ""}, {}, {"", ""}, kMaxSequenceNumber /*last_committed_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } @@ -859,15 +914,14 @@ TEST_F(CompactionIteratorWithSnapshotCheckerTest, NotRemoveDeletionIfValuePresentToEarlierSnapshot) { AddSnapshot(2,1); - RunTest( - {test::KeyStr("a", 4, kTypeDeletion), test::KeyStr("a", 1, kTypeValue), - test::KeyStr("b", 3, kTypeValue)}, - {"", "", ""}, - {test::KeyStr("a", 4, kTypeDeletion), test::KeyStr("a", 0, kTypeValue), - test::KeyStr("b", 3, kTypeValue)}, - {"", "", ""}, kMaxSequenceNumber /*last_commited_seq*/, - nullptr /*merge_operator*/, nullptr /*compaction_filter*/, - true /*bottommost_level*/); + RunTest({test::KeyStr("a", 4, kTypeDeletion), + test::KeyStr("a", 1, kTypeValue), test::KeyStr("b", 3, kTypeValue)}, + {"", "", ""}, + {test::KeyStr("a", 4, kTypeDeletion), + test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 3, kTypeValue)}, + {"", "", ""}, kMaxSequenceNumber /*last_committed_seq*/, + nullptr /*merge_operator*/, nullptr /*compaction_filter*/, + true /*bottommost_level*/); } TEST_F(CompactionIteratorWithSnapshotCheckerTest, @@ -879,7 +933,7 @@ {"", "", ""}, {test::KeyStr("b", 2, kTypeSingleDeletion), test::KeyStr("c", 3, kTypeSingleDeletion)}, - {"", ""}, kMaxSequenceNumber /*last_commited_seq*/, + {"", ""}, kMaxSequenceNumber /*last_committed_seq*/, nullptr /*merge_operator*/, nullptr /*compaction_filter*/, true /*bottommost_level*/); } @@ -913,9 +967,24 @@ 2 /*earliest_write_conflict_snapshot*/); } +// Same as above but with a blob index. In addition to the value getting +// trimmed, the type of the KV is changed to kTypeValue. +TEST_F(CompactionIteratorWithSnapshotCheckerTest, + KeepSingleDeletionForWriteConflictChecking_BlobIndex) { + AddSnapshot(2, 0); + RunTest({test::KeyStr("a", 2, kTypeSingleDeletion), + test::KeyStr("a", 1, kTypeBlobIndex)}, + {"", "fake_blob_index"}, + {test::KeyStr("a", 2, kTypeSingleDeletion), + test::KeyStr("a", 1, kTypeValue)}, + {"", ""}, 2 /*last_committed_seq*/, nullptr /*merge_operator*/, + nullptr /*compaction_filter*/, false /*bottommost_level*/, + 2 /*earliest_write_conflict_snapshot*/); +} + // Compaction filter should keep uncommitted key as-is, and -// * Convert the latest velue to deletion, and/or -// * if latest value is a merge, apply filter to all suequent merges. +// * Convert the latest value to deletion, and/or +// * if latest value is a merge, apply filter to all subsequent merges. TEST_F(CompactionIteratorWithSnapshotCheckerTest, CompactionFilter_Value) { std::unique_ptr compaction_filter( @@ -968,6 +1037,323 @@ compaction_filter.get()); } +// Tests how CompactionIterator work together with AllowIngestBehind. +class CompactionIteratorWithAllowIngestBehindTest + : public CompactionIteratorTest { + public: + bool AllowIngestBehind() const override { return true; } +}; + +// When allow_ingest_behind is set, compaction iterator is not targeting +// the bottommost level since there is no guarantee there won't be further +// data ingested under the compaction output in future. +TEST_P(CompactionIteratorWithAllowIngestBehindTest, NoConvertToPutAtBottom) { + std::shared_ptr merge_op = + MergeOperators::CreateStringAppendOperator(); + RunTest({test::KeyStr("a", 4, kTypeMerge), test::KeyStr("a", 3, kTypeMerge), + test::KeyStr("a", 2, kTypeMerge), test::KeyStr("b", 1, kTypeValue)}, + {"a4", "a3", "a2", "b1"}, + {test::KeyStr("a", 4, kTypeMerge), test::KeyStr("b", 1, kTypeValue)}, + {"a2,a3,a4", "b1"}, kMaxSequenceNumber /*last_committed_seq*/, + merge_op.get(), nullptr /*compaction_filter*/, + true /*bottomost_level*/); +} + +TEST_P(CompactionIteratorWithAllowIngestBehindTest, + MergeToPutIfEncounteredPutAtBottom) { + std::shared_ptr merge_op = + MergeOperators::CreateStringAppendOperator(); + RunTest({test::KeyStr("a", 4, kTypeMerge), test::KeyStr("a", 3, kTypeMerge), + test::KeyStr("a", 2, kTypeValue), test::KeyStr("b", 1, kTypeValue)}, + {"a4", "a3", "a2", "b1"}, + {test::KeyStr("a", 4, kTypeValue), test::KeyStr("b", 1, kTypeValue)}, + {"a2,a3,a4", "b1"}, kMaxSequenceNumber /*last_committed_seq*/, + merge_op.get(), nullptr /*compaction_filter*/, + true /*bottomost_level*/); +} + +INSTANTIATE_TEST_CASE_P(CompactionIteratorWithAllowIngestBehindTestInstance, + CompactionIteratorWithAllowIngestBehindTest, + testing::Values(true, false)); + +class CompactionIteratorTsGcTest : public CompactionIteratorTest { + public: + CompactionIteratorTsGcTest() + : CompactionIteratorTest(test::ComparatorWithU64Ts()) {} +}; + +TEST_P(CompactionIteratorTsGcTest, NoKeyEligibleForGC) { + constexpr char user_key[][2] = {{'a', '\0'}, {'b', '\0'}}; + const std::vector input_keys = { + test::KeyStr(/*ts=*/103, user_key[0], /*seq=*/4, kTypeValue), + test::KeyStr(/*ts=*/102, user_key[0], /*seq=*/3, + kTypeDeletionWithTimestamp), + test::KeyStr(/*ts=*/104, user_key[1], /*seq=*/5, kTypeValue)}; + const std::vector input_values = {"a3", "", "b2"}; + std::string full_history_ts_low; + // All keys' timestamps are newer than or equal to 102, thus none of them + // will be eligible for GC. + PutFixed64(&full_history_ts_low, 102); + const std::vector& expected_keys = input_keys; + const std::vector& expected_values = input_values; + const std::vector> params = { + {false, false}, {false, true}, {true, true}}; + for (const std::pair& param : params) { + const bool bottommost_level = param.first; + const bool key_not_exists_beyond_output_level = param.second; + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + bottommost_level, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + key_not_exists_beyond_output_level, &full_history_ts_low); + } +} + +TEST_P(CompactionIteratorTsGcTest, AllKeysOlderThanThreshold) { + constexpr char user_key[][2] = {{'a', '\0'}, {'b', '\0'}}; + const std::vector input_keys = { + test::KeyStr(/*ts=*/103, user_key[0], /*seq=*/4, + kTypeDeletionWithTimestamp), + test::KeyStr(/*ts=*/102, user_key[0], /*seq=*/3, kTypeValue), + test::KeyStr(/*ts=*/101, user_key[0], /*seq=*/2, kTypeValue), + test::KeyStr(/*ts=*/104, user_key[1], /*seq=*/5, kTypeValue)}; + const std::vector input_values = {"", "a2", "a1", "b5"}; + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, std::numeric_limits::max()); + { + // With a snapshot at seq 3, both the deletion marker and the key at 3 must + // be preserved. + AddSnapshot(3); + const std::vector expected_keys = { + input_keys[0], input_keys[1], input_keys[3]}; + const std::vector expected_values = {"", "a2", "b5"}; + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/false, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/false, &full_history_ts_low); + ClearSnapshots(); + } + { + // No snapshot, the deletion marker should be preserved because the user + // key may appear beyond output level. + const std::vector expected_keys = {input_keys[0], + input_keys[3]}; + const std::vector expected_values = {"", "b5"}; + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/false, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/false, &full_history_ts_low); + } + { + // No snapshot, the deletion marker can be dropped because the user key + // does not appear in higher levels. + const std::vector expected_keys = {input_keys[3]}; + const std::vector expected_values = {"b5"}; + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/false, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/true, &full_history_ts_low); + } +} + +TEST_P(CompactionIteratorTsGcTest, NewHidesOldSameSnapshot) { + constexpr char user_key[] = "a"; + const std::vector input_keys = { + test::KeyStr(/*ts=*/103, user_key, /*seq=*/4, kTypeDeletionWithTimestamp), + test::KeyStr(/*ts=*/102, user_key, /*seq=*/3, kTypeValue), + test::KeyStr(/*ts=*/101, user_key, /*seq=*/2, kTypeValue), + test::KeyStr(/*ts=*/100, user_key, /*seq=*/1, kTypeValue)}; + const std::vector input_values = {"", "a2", "a1", "a0"}; + { + std::string full_history_ts_low; + // Keys whose timestamps larger than or equal to 102 will be preserved. + PutFixed64(&full_history_ts_low, 102); + const std::vector expected_keys = { + input_keys[0], input_keys[1], input_keys[2]}; + const std::vector expected_values = {"", input_values[1], + input_values[2]}; + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/false, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/false, &full_history_ts_low); + } +} + +TEST_P(CompactionIteratorTsGcTest, DropTombstones) { + constexpr char user_key[] = "a"; + const std::vector input_keys = { + test::KeyStr(/*ts=*/103, user_key, /*seq=*/4, kTypeDeletionWithTimestamp), + test::KeyStr(/*ts=*/102, user_key, /*seq=*/3, kTypeValue), + test::KeyStr(/*ts=*/101, user_key, /*seq=*/2, kTypeDeletionWithTimestamp), + test::KeyStr(/*ts=*/100, user_key, /*seq=*/1, kTypeValue)}; + const std::vector input_values = {"", "a2", "", "a0"}; + const std::vector expected_keys = {input_keys[0], input_keys[1]}; + const std::vector expected_values = {"", "a2"}; + + // Take a snapshot at seq 2. + AddSnapshot(2); + + { + // Non-bottommost level, but key does not exist beyond output level. + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, 102); + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_sequence=*/kMaxSequenceNumber, + /*merge_op=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/false, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/true, &full_history_ts_low); + } + { + // Bottommost level + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, 102); + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/true, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/false, &full_history_ts_low); + } +} + +TEST_P(CompactionIteratorTsGcTest, RewriteTs) { + constexpr char user_key[] = "a"; + const std::vector input_keys = { + test::KeyStr(/*ts=*/103, user_key, /*seq=*/4, kTypeDeletionWithTimestamp), + test::KeyStr(/*ts=*/102, user_key, /*seq=*/3, kTypeValue), + test::KeyStr(/*ts=*/101, user_key, /*seq=*/2, kTypeDeletionWithTimestamp), + test::KeyStr(/*ts=*/100, user_key, /*seq=*/1, kTypeValue)}; + const std::vector input_values = {"", "a2", "", "a0"}; + const std::vector expected_keys = { + input_keys[0], input_keys[1], input_keys[2], + test::KeyStr(/*ts=*/0, user_key, /*seq=*/0, kTypeValue)}; + const std::vector expected_values = {"", "a2", "", "a0"}; + + AddSnapshot(1); + AddSnapshot(2); + + { + // Bottommost level and need to rewrite both ts and seq. + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, 102); + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/true, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/true, &full_history_ts_low); + } +} + +TEST_P(CompactionIteratorTsGcTest, SingleDeleteNoKeyEligibleForGC) { + constexpr char user_key[][2] = {{'a', '\0'}, {'b', '\0'}}; + const std::vector input_keys = { + test::KeyStr(/*ts=*/104, user_key[0], /*seq=*/4, kTypeSingleDeletion), + test::KeyStr(/*ts=*/103, user_key[0], /*seq=*/3, kTypeValue), + test::KeyStr(/*ts=*/102, user_key[1], /*seq=*/2, kTypeValue)}; + const std::vector input_values = {"", "a3", "b2"}; + std::string full_history_ts_low; + // All keys' timestamps are newer than or equal to 102, thus none of them + // will be eligible for GC. + PutFixed64(&full_history_ts_low, 102); + const std::vector& expected_keys = input_keys; + const std::vector& expected_values = input_values; + const std::vector> params = { + {false, false}, {false, true}, {true, true}}; + for (const std::pair& param : params) { + const bool bottommost_level = param.first; + const bool key_not_exists_beyond_output_level = param.second; + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + bottommost_level, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + key_not_exists_beyond_output_level, &full_history_ts_low); + } +} + +TEST_P(CompactionIteratorTsGcTest, SingleDeleteDropTombstones) { + constexpr char user_key[] = "a"; + const std::vector input_keys = { + test::KeyStr(/*ts=*/103, user_key, /*seq=*/4, kTypeSingleDeletion), + test::KeyStr(/*ts=*/102, user_key, /*seq=*/3, kTypeValue), + test::KeyStr(/*ts=*/101, user_key, /*seq=*/2, kTypeSingleDeletion), + test::KeyStr(/*ts=*/100, user_key, /*seq=*/1, kTypeValue)}; + const std::vector input_values = {"", "a2", "", "a0"}; + const std::vector expected_keys = {input_keys[0], input_keys[1]}; + const std::vector expected_values = {"", "a2"}; + + // Take a snapshot at seq 2. + AddSnapshot(2); + { + const std::vector> params = { + {false, false}, {false, true}, {true, true}}; + for (const std::pair& param : params) { + const bool bottommost_level = param.first; + const bool key_not_exists_beyond_output_level = param.second; + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, 102); + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + bottommost_level, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + key_not_exists_beyond_output_level, &full_history_ts_low); + } + } +} + +TEST_P(CompactionIteratorTsGcTest, SingleDeleteAllKeysOlderThanThreshold) { + constexpr char user_key[][2] = {{'a', '\0'}, {'b', '\0'}}; + const std::vector input_keys = { + test::KeyStr(/*ts=*/103, user_key[0], /*seq=*/4, kTypeSingleDeletion), + test::KeyStr(/*ts=*/102, user_key[0], /*seq=*/3, kTypeValue), + test::KeyStr(/*ts=*/104, user_key[1], /*seq=*/5, kTypeValue)}; + const std::vector input_values = {"", "a2", "b5"}; + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, std::numeric_limits::max()); + { + // With a snapshot at seq 3, both the deletion marker and the key at 3 must + // be preserved. + AddSnapshot(3); + const std::vector expected_keys = { + input_keys[0], input_keys[1], input_keys[2]}; + const std::vector expected_values = {"", "a2", "b5"}; + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/false, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/false, &full_history_ts_low); + ClearSnapshots(); + } + { + // No snapshot. + const std::vector expected_keys = {input_keys[2]}; + const std::vector expected_values = {"b5"}; + RunTest(input_keys, input_values, expected_keys, expected_values, + /*last_committed_seq=*/kMaxSequenceNumber, + /*merge_operator=*/nullptr, /*compaction_filter=*/nullptr, + /*bottommost_level=*/false, + /*earliest_write_conflict_snapshot=*/kMaxSequenceNumber, + /*key_not_exists_beyond_output_level=*/false, &full_history_ts_low); + } +} + +INSTANTIATE_TEST_CASE_P(CompactionIteratorTsGcTestInstance, + CompactionIteratorTsGcTest, + testing::Values(true, false)); + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_job.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_job.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job.cc 2025-05-19 16:14:27.000000000 +0000 @@ -7,6 +7,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include "db/compaction/compaction_job.h" + #include #include #include @@ -18,8 +20,12 @@ #include #include +#include "db/blob/blob_counting_iterator.h" +#include "db/blob/blob_file_addition.h" +#include "db/blob/blob_file_builder.h" +#include "db/blob/blob_garbage_meter.h" #include "db/builder.h" -#include "db/compaction/compaction_job.h" +#include "db/compaction/clipping_iterator.h" #include "db/db_impl/db_impl.h" #include "db/db_iter.h" #include "db/dbformat.h" @@ -31,6 +37,7 @@ #include "db/memtable_list.h" #include "db/merge_context.h" #include "db/merge_helper.h" +#include "db/output_validator.h" #include "db/range_del_aggregator.h" #include "db/version_set.h" #include "file/filename.h" @@ -42,18 +49,23 @@ #include "monitoring/iostats_context_imp.h" #include "monitoring/perf_context_imp.h" #include "monitoring/thread_status_util.h" +#include "options/configurable_helper.h" +#include "options/options_helper.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/env.h" +#include "rocksdb/sst_partitioner.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" #include "rocksdb/table.h" +#include "rocksdb/utilities/options_type.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_factory.h" #include "table/merging_iterator.h" #include "table/table_builder.h" #include "test_util/sync_point.h" #include "util/coding.h" +#include "util/hash.h" #include "util/mutexlock.h" #include "util/random.h" #include "util/stop_watch.h" @@ -95,6 +107,10 @@ return "ExternalSstIngestion"; case CompactionReason::kPeriodicCompaction: return "PeriodicCompaction"; + case CompactionReason::kChangeTemperature: + return "ChangeTemperature"; + case CompactionReason::kForcedBlobGC: + return "ForcedBlobGC"; case CompactionReason::kNumOfReasons: // fall through default: @@ -116,23 +132,37 @@ // The return status of this subcompaction Status status; + // The return IO Status of this subcompaction + IOStatus io_status; + // Files produced by this subcompaction struct Output { + Output(FileMetaData&& _meta, const InternalKeyComparator& _icmp, + bool _enable_order_check, bool _enable_hash, bool _finished = false, + uint64_t precalculated_hash = 0) + : meta(std::move(_meta)), + validator(_icmp, _enable_order_check, _enable_hash, + precalculated_hash), + finished(_finished) {} FileMetaData meta; + OutputValidator validator; bool finished; std::shared_ptr table_properties; }; // State kept for output being generated std::vector outputs; + std::vector blob_file_additions; + std::unique_ptr blob_garbage_meter; std::unique_ptr outfile; std::unique_ptr builder; + Output* current_output() { if (outputs.empty()) { - // This subcompaction's outptut could be empty if compaction was aborted + // This subcompaction's output could be empty if compaction was aborted // before this subcompaction had a chance to generate any output files. // When subcompactions are executed sequentially this is more likely and - // will be particulalry likely for the later subcompactions to be empty. + // will be particularly likely for the later subcompactions to be empty. // Once they are run in parallel however it should be much rarer. return nullptr; } else { @@ -140,13 +170,20 @@ } } - uint64_t current_output_file_size; + // Some identified files with old oldest ancester time and the range should be + // isolated out so that the output file(s) in that range can be merged down + // for TTL and clear the timestamps for the range. + std::vector files_to_cut_for_ttl; + int cur_files_to_cut_for_ttl = -1; + int next_files_to_cut_for_ttl = 0; + + uint64_t current_output_file_size = 0; // State during the subcompaction - uint64_t total_bytes; - uint64_t num_output_records; + uint64_t total_bytes = 0; + uint64_t num_output_records = 0; CompactionJobStats compaction_job_stats; - uint64_t approx_size; + uint64_t approx_size = 0; // An index that used to speed up ShouldStopBefore(). size_t grandparent_index = 0; // The number of bytes overlapping between the current output and @@ -154,49 +191,35 @@ uint64_t overlapped_bytes = 0; // A flag determine whether the key has been seen in ShouldStopBefore() bool seen_key = false; + // sub compaction job id, which is used to identify different sub-compaction + // within the same compaction job. + const uint32_t sub_job_id; - SubcompactionState(Compaction* c, Slice* _start, Slice* _end, - uint64_t size = 0) + SubcompactionState(Compaction* c, Slice* _start, Slice* _end, uint64_t size, + uint32_t _sub_job_id) : compaction(c), start(_start), end(_end), - outfile(nullptr), - builder(nullptr), - current_output_file_size(0), - total_bytes(0), - num_output_records(0), approx_size(size), - grandparent_index(0), - overlapped_bytes(0), - seen_key(false) { + sub_job_id(_sub_job_id) { assert(compaction != nullptr); } - SubcompactionState(SubcompactionState&& o) { *this = std::move(o); } - - SubcompactionState& operator=(SubcompactionState&& o) { - compaction = std::move(o.compaction); - start = std::move(o.start); - end = std::move(o.end); - status = std::move(o.status); - outputs = std::move(o.outputs); - outfile = std::move(o.outfile); - builder = std::move(o.builder); - current_output_file_size = std::move(o.current_output_file_size); - total_bytes = std::move(o.total_bytes); - num_output_records = std::move(o.num_output_records); - compaction_job_stats = std::move(o.compaction_job_stats); - approx_size = std::move(o.approx_size); - grandparent_index = std::move(o.grandparent_index); - overlapped_bytes = std::move(o.overlapped_bytes); - seen_key = std::move(o.seen_key); - return *this; + // Adds the key and value to the builder + // If paranoid is true, adds the key-value to the paranoid hash + Status AddToBuilder(const Slice& key, const Slice& value) { + auto curr = current_output(); + assert(builder != nullptr); + assert(curr != nullptr); + Status s = curr->validator.Add(key, value); + if (!s.ok()) { + return s; + } + builder->Add(key, value); + return Status::OK(); } - // Because member std::unique_ptrs do not have these. - SubcompactionState(const SubcompactionState&) = delete; - - SubcompactionState& operator=(const SubcompactionState&) = delete; + void FillFilesToCutForTtl(); // Returns true iff we should stop building the current output // before processing "internal_key". @@ -205,6 +228,7 @@ &compaction->column_family_data()->internal_comparator(); const std::vector& grandparents = compaction->grandparents(); + bool grandparant_file_switched = false; // Scan to find earliest grandparent file that contains key. while (grandparent_index < grandparents.size() && icmp->Compare(internal_key, @@ -212,6 +236,7 @@ 0) { if (seen_key) { overlapped_bytes += grandparents[grandparent_index]->fd.GetFileSize(); + grandparant_file_switched = true; } assert(grandparent_index + 1 >= grandparents.size() || icmp->Compare( @@ -221,17 +246,99 @@ } seen_key = true; - if (overlapped_bytes + curr_file_size > - compaction->max_compaction_bytes()) { + if (grandparant_file_switched && overlapped_bytes + curr_file_size > + compaction->max_compaction_bytes()) { // Too much overlap for current output; start new output overlapped_bytes = 0; return true; } + if (!files_to_cut_for_ttl.empty()) { + if (cur_files_to_cut_for_ttl != -1) { + // Previous key is inside the range of a file + if (icmp->Compare(internal_key, + files_to_cut_for_ttl[cur_files_to_cut_for_ttl] + ->largest.Encode()) > 0) { + next_files_to_cut_for_ttl = cur_files_to_cut_for_ttl + 1; + cur_files_to_cut_for_ttl = -1; + return true; + } + } else { + // Look for the key position + while (next_files_to_cut_for_ttl < + static_cast(files_to_cut_for_ttl.size())) { + if (icmp->Compare(internal_key, + files_to_cut_for_ttl[next_files_to_cut_for_ttl] + ->smallest.Encode()) >= 0) { + if (icmp->Compare(internal_key, + files_to_cut_for_ttl[next_files_to_cut_for_ttl] + ->largest.Encode()) <= 0) { + // With in the current file + cur_files_to_cut_for_ttl = next_files_to_cut_for_ttl; + return true; + } + // Beyond the current file + next_files_to_cut_for_ttl++; + } else { + // Still fall into the gap + break; + } + } + } + } + return false; } + + Status ProcessOutFlowIfNeeded(const Slice& key, const Slice& value) { + if (!blob_garbage_meter) { + return Status::OK(); + } + + return blob_garbage_meter->ProcessOutFlow(key, value); + } }; +void CompactionJob::SubcompactionState::FillFilesToCutForTtl() { + if (compaction->immutable_options()->compaction_style != + CompactionStyle::kCompactionStyleLevel || + compaction->immutable_options()->compaction_pri != + CompactionPri::kMinOverlappingRatio || + compaction->mutable_cf_options()->ttl == 0 || + compaction->num_input_levels() < 2 || compaction->bottommost_level()) { + return; + } + + // We define new file with oldest ancestor time to be younger than 1/4 TTL, + // and an old one to be older than 1/2 TTL time. + int64_t temp_current_time; + auto get_time_status = compaction->immutable_options()->clock->GetCurrentTime( + &temp_current_time); + if (!get_time_status.ok()) { + return; + } + uint64_t current_time = static_cast(temp_current_time); + if (current_time < compaction->mutable_cf_options()->ttl) { + return; + } + uint64_t old_age_thres = + current_time - compaction->mutable_cf_options()->ttl / 2; + + const std::vector& olevel = + *(compaction->inputs(compaction->num_input_levels() - 1)); + for (FileMetaData* file : olevel) { + // Worth filtering out by start and end? + uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime(); + // We put old files if they are not too small to prevent a flood + // of small files. + if (oldest_ancester_time < old_age_thres && + file->fd.GetFileSize() > + compaction->mutable_cf_options()->target_file_size_base / 2) { + files_to_cut_for_ttl.push_back(file); + } + } +} + // Maintains state for the entire compaction struct CompactionJob::CompactionState { Compaction* const compaction; @@ -241,21 +348,13 @@ std::vector sub_compact_states; Status status; - uint64_t total_bytes; - uint64_t num_output_records; - - explicit CompactionState(Compaction* c) - : compaction(c), - total_bytes(0), - num_output_records(0) {} + size_t num_output_files = 0; + uint64_t total_bytes = 0; + size_t num_blob_output_files = 0; + uint64_t total_blob_bytes = 0; + uint64_t num_output_records = 0; - size_t NumOutputFiles() { - size_t total = 0; - for (auto& s : sub_compact_states) { - total += s.outputs.size(); - } - return total; - } + explicit CompactionState(Compaction* c) : compaction(c) {} Slice SmallestUserKey() { for (const auto& sub_compact_state : sub_compact_states) { @@ -282,49 +381,78 @@ }; void CompactionJob::AggregateStatistics() { + assert(compact_); + for (SubcompactionState& sc : compact_->sub_compact_states) { + auto& outputs = sc.outputs; + + if (!outputs.empty() && !outputs.back().meta.fd.file_size) { + // An error occurred, so ignore the last output. + outputs.pop_back(); + } + + compact_->num_output_files += outputs.size(); compact_->total_bytes += sc.total_bytes; - compact_->num_output_records += sc.num_output_records; - } - if (compaction_job_stats_) { - for (SubcompactionState& sc : compact_->sub_compact_states) { - compaction_job_stats_->Add(sc.compaction_job_stats); + + const auto& blobs = sc.blob_file_additions; + + compact_->num_blob_output_files += blobs.size(); + + for (const auto& blob : blobs) { + compact_->total_blob_bytes += blob.GetTotalBlobBytes(); } + + compact_->num_output_records += sc.num_output_records; + + compaction_job_stats_->Add(sc.compaction_job_stats); } } CompactionJob::CompactionJob( int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, - const FileOptions& file_options, VersionSet* versions, - const std::atomic* shutting_down, + const MutableDBOptions& mutable_db_options, const FileOptions& file_options, + VersionSet* versions, const std::atomic* shutting_down, const SequenceNumber preserve_deletes_seqnum, LogBuffer* log_buffer, - Directory* db_directory, Directory* output_directory, Statistics* stats, + FSDirectory* db_directory, FSDirectory* output_directory, + FSDirectory* blob_output_directory, Statistics* stats, InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler, std::vector existing_snapshots, SequenceNumber earliest_write_conflict_snapshot, const SnapshotChecker* snapshot_checker, std::shared_ptr table_cache, EventLogger* event_logger, bool paranoid_file_checks, bool measure_io_stats, const std::string& dbname, CompactionJobStats* compaction_job_stats, - Env::Priority thread_pri, const std::atomic* manual_compaction_paused) - : job_id_(job_id), - compact_(new CompactionState(compaction)), - compaction_job_stats_(compaction_job_stats), + Env::Priority thread_pri, const std::shared_ptr& io_tracer, + const std::atomic* manual_compaction_paused, + const std::atomic* manual_compaction_canceled, + const std::string& db_id, const std::string& db_session_id, + std::string full_history_ts_low, BlobFileCompletionCallback* blob_callback) + : compact_(new CompactionState(compaction)), compaction_stats_(compaction->compaction_reason(), 1), - dbname_(dbname), db_options_(db_options), + mutable_db_options_copy_(mutable_db_options), + log_buffer_(log_buffer), + output_directory_(output_directory), + stats_(stats), + bottommost_level_(false), + write_hint_(Env::WLTH_NOT_SET), + job_id_(job_id), + compaction_job_stats_(compaction_job_stats), + dbname_(dbname), + db_id_(db_id), + db_session_id_(db_session_id), file_options_(file_options), env_(db_options.env), - fs_(db_options.fs.get()), + io_tracer_(io_tracer), + fs_(db_options.fs, io_tracer), file_options_for_read_( fs_->OptimizeForCompactionTableRead(file_options, db_options_)), versions_(versions), shutting_down_(shutting_down), manual_compaction_paused_(manual_compaction_paused), + manual_compaction_canceled_(manual_compaction_canceled), preserve_deletes_seqnum_(preserve_deletes_seqnum), - log_buffer_(log_buffer), db_directory_(db_directory), - output_directory_(output_directory), - stats_(stats), + blob_output_directory_(blob_output_directory), db_mutex_(db_mutex), db_error_handler_(db_error_handler), existing_snapshots_(std::move(existing_snapshots)), @@ -332,11 +460,12 @@ snapshot_checker_(snapshot_checker), table_cache_(std::move(table_cache)), event_logger_(event_logger), - bottommost_level_(false), paranoid_file_checks_(paranoid_file_checks), measure_io_stats_(measure_io_stats), - write_hint_(Env::WLTH_NOT_SET), - thread_pri_(thread_pri) { + thread_pri_(thread_pri), + full_history_ts_low_(std::move(full_history_ts_low)), + blob_callback_(blob_callback) { + assert(compaction_job_stats_ != nullptr); assert(log_buffer_ != nullptr); const auto* cfd = compact_->compaction->column_family_data(); ThreadStatusUtil::SetColumnFamily(cfd, cfd->ioptions()->env, @@ -388,17 +517,16 @@ // to ensure GetThreadList() can always show them all together. ThreadStatusUtil::SetThreadOperation(ThreadStatus::OP_COMPACTION); - if (compaction_job_stats_) { - compaction_job_stats_->is_manual_compaction = - compaction->is_manual_compaction(); - } + compaction_job_stats_->is_manual_compaction = + compaction->is_manual_compaction(); + compaction_job_stats_->is_full_compaction = compaction->is_full_compaction(); } void CompactionJob::Prepare() { AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_PREPARE); - // Generate file_levels_ for compaction berfore making Iterator + // Generate file_levels_ for compaction before making Iterator auto* c = compact_->compaction; assert(c->column_family_data() != nullptr); assert(c->column_family_data()->current()->storage_info()->NumLevelFiles( @@ -410,7 +538,7 @@ if (c->ShouldFormSubcompactions()) { { - StopWatch sw(env_, stats_, SUBCOMPACTION_SETUP_TIME); + StopWatch sw(db_options_.clock, stats_, SUBCOMPACTION_SETUP_TIME); GenSubcompactionBoundaries(); } assert(sizes_.size() == boundaries_.size() + 1); @@ -418,12 +546,18 @@ for (size_t i = 0; i <= boundaries_.size(); i++) { Slice* start = i == 0 ? nullptr : &boundaries_[i - 1]; Slice* end = i == boundaries_.size() ? nullptr : &boundaries_[i]; - compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i]); + compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i], + static_cast(i)); } RecordInHistogram(stats_, NUM_SUBCOMPACTIONS_SCHEDULED, compact_->sub_compact_states.size()); } else { - compact_->sub_compact_states.emplace_back(c, nullptr, nullptr); + constexpr Slice* start = nullptr; + constexpr Slice* end = nullptr; + constexpr uint64_t size = 0; + + compact_->sub_compact_states.emplace_back(c, start, end, size, + /*sub_job_id*/ 0); } } @@ -529,9 +663,10 @@ int base_level = v->storage_info()->base_level(); uint64_t max_output_files = static_cast(std::ceil( sum / min_file_fill_percent / - MaxFileSizeForLevel(*(c->mutable_cf_options()), out_lvl, - c->immutable_cf_options()->compaction_style, base_level, - c->immutable_cf_options()->level_compaction_dynamic_level_bytes))); + MaxFileSizeForLevel( + *(c->mutable_cf_options()), out_lvl, + c->immutable_options()->compaction_style, base_level, + c->immutable_options()->level_compaction_dynamic_level_bytes))); uint64_t subcompactions = std::min({static_cast(ranges.size()), static_cast(c->max_subcompactions()), @@ -542,7 +677,7 @@ // Greedily add ranges to the subcompaction until the sum of the ranges' // sizes becomes >= the expected mean size of a subcompaction sum = 0; - for (size_t i = 0; i < ranges.size() - 1; i++) { + for (size_t i = 0; i + 1 < ranges.size(); i++) { sum += ranges[i].size; if (subcompactions == 1) { // If there's only one left to schedule then it goes to the end so no @@ -572,7 +707,7 @@ const size_t num_threads = compact_->sub_compact_states.size(); assert(num_threads > 0); - const uint64_t start_micros = env_->NowMicros(); + const uint64_t start_micros = db_options_.clock->NowMicros(); // Launch a thread for each of subcompactions 1...num_threads-1 std::vector thread_pool; @@ -591,7 +726,7 @@ thread.join(); } - compaction_stats_.micros = env_->NowMicros() - start_micros; + compaction_stats_.micros = db_options_.clock->NowMicros() - start_micros; compaction_stats_.cpu_micros = 0; for (size_t i = 0; i < compact_->sub_compact_states.size(); i++) { compaction_stats_.cpu_micros += @@ -606,33 +741,62 @@ // Check if any thread encountered an error during execution Status status; + IOStatus io_s; + bool wrote_new_blob_files = false; + for (const auto& state : compact_->sub_compact_states) { if (!state.status.ok()) { status = state.status; + io_s = state.io_status; break; } + + if (!state.blob_file_additions.empty()) { + wrote_new_blob_files = true; + } } - if (status.ok() && output_directory_) { - status = output_directory_->Fsync(); + if (io_status_.ok()) { + io_status_ = io_s; } + if (status.ok()) { + constexpr IODebugContext* dbg = nullptr; + + if (output_directory_) { + io_s = output_directory_->FsyncWithDirOptions( + IOOptions(), dbg, + DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced)); + } + if (io_s.ok() && wrote_new_blob_files && blob_output_directory_ && + blob_output_directory_ != output_directory_) { + io_s = blob_output_directory_->FsyncWithDirOptions( + IOOptions(), dbg, + DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced)); + } + } + if (io_status_.ok()) { + io_status_ = io_s; + } + if (status.ok()) { + status = io_s; + } if (status.ok()) { thread_pool.clear(); - std::vector files_meta; + std::vector files_output; for (const auto& state : compact_->sub_compact_states) { for (const auto& output : state.outputs) { - files_meta.emplace_back(&output.meta); + files_output.emplace_back(&output); } } ColumnFamilyData* cfd = compact_->compaction->column_family_data(); - auto prefix_extractor = - compact_->compaction->mutable_cf_options()->prefix_extractor.get(); - std::atomic next_file_meta_idx(0); + auto& prefix_extractor = + compact_->compaction->mutable_cf_options()->prefix_extractor; + std::atomic next_file_idx(0); auto verify_table = [&](Status& output_status) { while (true) { - size_t file_idx = next_file_meta_idx.fetch_add(1); - if (file_idx >= files_meta.size()) { + size_t file_idx = next_file_idx.fetch_add(1); + if (file_idx >= files_output.size()) { break; } // Verify that the table is usable @@ -641,21 +805,40 @@ // No matter whether use_direct_io_for_flush_and_compaction is true, // we will regard this verification as user reads since the goal is // to cache it here for further user reads + ReadOptions read_options; InternalIterator* iter = cfd->table_cache()->NewIterator( - ReadOptions(), file_options_, cfd->internal_comparator(), - *files_meta[file_idx], /*range_del_agg=*/nullptr, prefix_extractor, + read_options, file_options_, cfd->internal_comparator(), + files_output[file_idx]->meta, /*range_del_agg=*/nullptr, + prefix_extractor, /*table_reader_ptr=*/nullptr, cfd->internal_stats()->GetFileReadHist( compact_->compaction->output_level()), TableReaderCaller::kCompactionRefill, /*arena=*/nullptr, /*skip_filters=*/false, compact_->compaction->output_level(), + MaxFileSizeForL0MetaPin( + *compact_->compaction->mutable_cf_options()), /*smallest_compaction_key=*/nullptr, - /*largest_compaction_key=*/nullptr); + /*largest_compaction_key=*/nullptr, + /*allow_unprepared_value=*/false); auto s = iter->status(); if (s.ok() && paranoid_file_checks_) { - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {} - s = iter->status(); + OutputValidator validator(cfd->internal_comparator(), + /*_enable_order_check=*/true, + /*_enable_hash=*/true); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + s = validator.Add(iter->key(), iter->value()); + if (!s.ok()) { + break; + } + } + if (s.ok()) { + s = iter->status(); + } + if (s.ok() && + !validator.CompareValidator(files_output[file_idx]->validator)) { + s = Status::Corruption("Paranoid checksums do not match"); + } } delete iter; @@ -686,7 +869,7 @@ for (const auto& state : compact_->sub_compact_states) { for (const auto& output : state.outputs) { auto fn = - TableFileName(state.compaction->immutable_cf_options()->cf_paths, + TableFileName(state.compaction->immutable_options()->cf_paths, output.meta.fd.GetNumber(), output.meta.fd.GetPathId()); tp[fn] = output.table_properties; } @@ -696,6 +879,7 @@ // Finish up all book-keeping to unify the subcompaction results AggregateStatistics(); UpdateCompactionStats(); + RecordCompactionIOStats(); LogFlush(db_options_.info_log); TEST_SYNC_POINT("CompactionJob::Run():End"); @@ -705,17 +889,26 @@ } Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) { + assert(compact_); + AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_INSTALL); db_mutex_->AssertHeld(); Status status = compact_->status; + ColumnFamilyData* cfd = compact_->compaction->column_family_data(); + assert(cfd); + cfd->internal_stats()->AddCompactionStats( compact_->compaction->output_level(), thread_pri_, compaction_stats_); if (status.ok()) { status = InstallCompactionResults(mutable_cf_options); } + if (!versions_->io_status().ok()) { + io_status_ = versions_->io_status(); + } + VersionStorageInfo::LevelSummaryStorage tmp; auto vstorage = cfd->current()->storage_info(); const auto& stats = compaction_stats_; @@ -725,63 +918,86 @@ double bytes_read_per_sec = 0; double bytes_written_per_sec = 0; - if (stats.bytes_read_non_output_levels > 0) { - read_write_amp = (stats.bytes_written + stats.bytes_read_output_level + - stats.bytes_read_non_output_levels) / - static_cast(stats.bytes_read_non_output_levels); - write_amp = stats.bytes_written / - static_cast(stats.bytes_read_non_output_levels); + const uint64_t bytes_read_non_output_and_blob = + stats.bytes_read_non_output_levels + stats.bytes_read_blob; + const uint64_t bytes_read_all = + stats.bytes_read_output_level + bytes_read_non_output_and_blob; + const uint64_t bytes_written_all = + stats.bytes_written + stats.bytes_written_blob; + + if (bytes_read_non_output_and_blob > 0) { + read_write_amp = (bytes_written_all + bytes_read_all) / + static_cast(bytes_read_non_output_and_blob); + write_amp = + bytes_written_all / static_cast(bytes_read_non_output_and_blob); } if (stats.micros > 0) { - bytes_read_per_sec = - (stats.bytes_read_non_output_levels + stats.bytes_read_output_level) / - static_cast(stats.micros); + bytes_read_per_sec = bytes_read_all / static_cast(stats.micros); bytes_written_per_sec = - stats.bytes_written / static_cast(stats.micros); + bytes_written_all / static_cast(stats.micros); } + const std::string& column_family_name = cfd->GetName(); + + constexpr double kMB = 1048576.0; + ROCKS_LOG_BUFFER( log_buffer_, "[%s] compacted to: %s, MB/sec: %.1f rd, %.1f wr, level %d, " - "files in(%d, %d) out(%d) " - "MB in(%.1f, %.1f) out(%.1f), read-write-amplify(%.1f) " - "write-amplify(%.1f) %s, records in: %" PRIu64 + "files in(%d, %d) out(%d +%d blob) " + "MB in(%.1f, %.1f +%.1f blob) out(%.1f +%.1f blob), " + "read-write-amplify(%.1f) write-amplify(%.1f) %s, records in: %" PRIu64 ", records dropped: %" PRIu64 " output_compression: %s\n", - cfd->GetName().c_str(), vstorage->LevelSummary(&tmp), bytes_read_per_sec, - bytes_written_per_sec, compact_->compaction->output_level(), + column_family_name.c_str(), vstorage->LevelSummary(&tmp), + bytes_read_per_sec, bytes_written_per_sec, + compact_->compaction->output_level(), stats.num_input_files_in_non_output_levels, stats.num_input_files_in_output_level, stats.num_output_files, - stats.bytes_read_non_output_levels / 1048576.0, - stats.bytes_read_output_level / 1048576.0, - stats.bytes_written / 1048576.0, read_write_amp, write_amp, - status.ToString().c_str(), stats.num_input_records, + stats.num_output_files_blob, stats.bytes_read_non_output_levels / kMB, + stats.bytes_read_output_level / kMB, stats.bytes_read_blob / kMB, + stats.bytes_written / kMB, stats.bytes_written_blob / kMB, read_write_amp, + write_amp, status.ToString().c_str(), stats.num_input_records, stats.num_dropped_records, CompressionTypeToString(compact_->compaction->output_compression()) .c_str()); + const auto& blob_files = vstorage->GetBlobFiles(); + if (!blob_files.empty()) { + ROCKS_LOG_BUFFER(log_buffer_, + "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 + "\n", + column_family_name.c_str(), blob_files.begin()->first, + blob_files.rbegin()->first); + } + UpdateCompactionJobStats(stats); - auto stream = event_logger_->LogToBuffer(log_buffer_); + auto stream = event_logger_->LogToBuffer(log_buffer_, 8192); stream << "job" << job_id_ << "event" << "compaction_finished" << "compaction_time_micros" << stats.micros << "compaction_time_cpu_micros" << stats.cpu_micros << "output_level" << compact_->compaction->output_level() << "num_output_files" - << compact_->NumOutputFiles() << "total_output_size" - << compact_->total_bytes << "num_input_records" - << stats.num_input_records << "num_output_records" - << compact_->num_output_records << "num_subcompactions" - << compact_->sub_compact_states.size() << "output_compression" - << CompressionTypeToString(compact_->compaction->output_compression()); + << compact_->num_output_files << "total_output_size" + << compact_->total_bytes; - if (compaction_job_stats_ != nullptr) { - stream << "num_single_delete_mismatches" - << compaction_job_stats_->num_single_del_mismatch; - stream << "num_single_delete_fallthrough" - << compaction_job_stats_->num_single_del_fallthru; + if (compact_->num_blob_output_files > 0) { + stream << "num_blob_output_files" << compact_->num_blob_output_files + << "total_blob_output_size" << compact_->total_blob_bytes; } - if (measure_io_stats_ && compaction_job_stats_ != nullptr) { + stream << "num_input_records" << stats.num_input_records + << "num_output_records" << compact_->num_output_records + << "num_subcompactions" << compact_->sub_compact_states.size() + << "output_compression" + << CompressionTypeToString(compact_->compaction->output_compression()); + + stream << "num_single_delete_mismatches" + << compaction_job_stats_->num_single_del_mismatch; + stream << "num_single_delete_fallthrough" + << compaction_job_stats_->num_single_del_fallthru; + + if (measure_io_stats_) { stream << "file_write_nanos" << compaction_job_stats_->file_write_nanos; stream << "file_range_sync_nanos" << compaction_job_stats_->file_range_sync_nanos; @@ -797,14 +1013,222 @@ } stream.EndArray(); + if (!blob_files.empty()) { + stream << "blob_file_head" << blob_files.begin()->first; + stream << "blob_file_tail" << blob_files.rbegin()->first; + } + CleanupCompaction(); return status; } +#ifndef ROCKSDB_LITE +CompactionServiceJobStatus +CompactionJob::ProcessKeyValueCompactionWithCompactionService( + SubcompactionState* sub_compact) { + assert(sub_compact); + assert(sub_compact->compaction); + assert(db_options_.compaction_service); + + const Compaction* compaction = sub_compact->compaction; + CompactionServiceInput compaction_input; + compaction_input.output_level = compaction->output_level(); + + const std::vector& inputs = + *(compact_->compaction->inputs()); + for (const auto& files_per_level : inputs) { + for (const auto& file : files_per_level.files) { + compaction_input.input_files.emplace_back( + MakeTableFileName(file->fd.GetNumber())); + } + } + compaction_input.column_family.name = + compaction->column_family_data()->GetName(); + compaction_input.column_family.options = + compaction->column_family_data()->GetLatestCFOptions(); + compaction_input.db_options = + BuildDBOptions(db_options_, mutable_db_options_copy_); + compaction_input.snapshots = existing_snapshots_; + compaction_input.has_begin = sub_compact->start; + compaction_input.begin = + compaction_input.has_begin ? sub_compact->start->ToString() : ""; + compaction_input.has_end = sub_compact->end; + compaction_input.end = + compaction_input.has_end ? sub_compact->end->ToString() : ""; + compaction_input.approx_size = sub_compact->approx_size; + + std::string compaction_input_binary; + Status s = compaction_input.Write(&compaction_input_binary); + if (!s.ok()) { + sub_compact->status = s; + return CompactionServiceJobStatus::kFailure; + } + + std::ostringstream input_files_oss; + bool is_first_one = true; + for (const auto& file : compaction_input.input_files) { + input_files_oss << (is_first_one ? "" : ", ") << file; + is_first_one = false; + } + + ROCKS_LOG_INFO( + db_options_.info_log, + "[%s] [JOB %d] Starting remote compaction (output level: %d): %s", + compaction_input.column_family.name.c_str(), job_id_, + compaction_input.output_level, input_files_oss.str().c_str()); + CompactionServiceJobInfo info(dbname_, db_id_, db_session_id_, + GetCompactionId(sub_compact), thread_pri_); + CompactionServiceJobStatus compaction_status = + db_options_.compaction_service->StartV2(info, compaction_input_binary); + switch (compaction_status) { + case CompactionServiceJobStatus::kSuccess: + break; + case CompactionServiceJobStatus::kFailure: + sub_compact->status = Status::Incomplete( + "CompactionService failed to start compaction job."); + ROCKS_LOG_WARN(db_options_.info_log, + "[%s] [JOB %d] Remote compaction failed to start.", + compaction_input.column_family.name.c_str(), job_id_); + return compaction_status; + case CompactionServiceJobStatus::kUseLocal: + ROCKS_LOG_INFO( + db_options_.info_log, + "[%s] [JOB %d] Remote compaction fallback to local by API Start.", + compaction_input.column_family.name.c_str(), job_id_); + return compaction_status; + default: + assert(false); // unknown status + break; + } + + ROCKS_LOG_INFO(db_options_.info_log, + "[%s] [JOB %d] Waiting for remote compaction...", + compaction_input.column_family.name.c_str(), job_id_); + std::string compaction_result_binary; + compaction_status = db_options_.compaction_service->WaitForCompleteV2( + info, &compaction_result_binary); + + if (compaction_status == CompactionServiceJobStatus::kUseLocal) { + ROCKS_LOG_INFO(db_options_.info_log, + "[%s] [JOB %d] Remote compaction fallback to local by API " + "WaitForComplete.", + compaction_input.column_family.name.c_str(), job_id_); + return compaction_status; + } + + CompactionServiceResult compaction_result; + s = CompactionServiceResult::Read(compaction_result_binary, + &compaction_result); + + if (compaction_status == CompactionServiceJobStatus::kFailure) { + if (s.ok()) { + if (compaction_result.status.ok()) { + sub_compact->status = Status::Incomplete( + "CompactionService failed to run the compaction job (even though " + "the internal status is okay)."); + } else { + // set the current sub compaction status with the status returned from + // remote + sub_compact->status = compaction_result.status; + } + } else { + sub_compact->status = Status::Incomplete( + "CompactionService failed to run the compaction job (and no valid " + "result is returned)."); + compaction_result.status.PermitUncheckedError(); + } + ROCKS_LOG_WARN(db_options_.info_log, + "[%s] [JOB %d] Remote compaction failed.", + compaction_input.column_family.name.c_str(), job_id_); + return compaction_status; + } + + if (!s.ok()) { + sub_compact->status = s; + compaction_result.status.PermitUncheckedError(); + return CompactionServiceJobStatus::kFailure; + } + sub_compact->status = compaction_result.status; + + std::ostringstream output_files_oss; + is_first_one = true; + for (const auto& file : compaction_result.output_files) { + output_files_oss << (is_first_one ? "" : ", ") << file.file_name; + is_first_one = false; + } + + ROCKS_LOG_INFO(db_options_.info_log, + "[%s] [JOB %d] Receive remote compaction result, output path: " + "%s, files: %s", + compaction_input.column_family.name.c_str(), job_id_, + compaction_result.output_path.c_str(), + output_files_oss.str().c_str()); + + if (!s.ok()) { + sub_compact->status = s; + return CompactionServiceJobStatus::kFailure; + } + + for (const auto& file : compaction_result.output_files) { + uint64_t file_num = versions_->NewFileNumber(); + auto src_file = compaction_result.output_path + "/" + file.file_name; + auto tgt_file = TableFileName(compaction->immutable_options()->cf_paths, + file_num, compaction->output_path_id()); + s = fs_->RenameFile(src_file, tgt_file, IOOptions(), nullptr); + if (!s.ok()) { + sub_compact->status = s; + return CompactionServiceJobStatus::kFailure; + } + + FileMetaData meta; + uint64_t file_size; + s = fs_->GetFileSize(tgt_file, IOOptions(), &file_size, nullptr); + if (!s.ok()) { + sub_compact->status = s; + return CompactionServiceJobStatus::kFailure; + } + meta.fd = FileDescriptor(file_num, compaction->output_path_id(), file_size, + file.smallest_seqno, file.largest_seqno); + meta.smallest.DecodeFrom(file.smallest_internal_key); + meta.largest.DecodeFrom(file.largest_internal_key); + meta.oldest_ancester_time = file.oldest_ancester_time; + meta.file_creation_time = file.file_creation_time; + meta.marked_for_compaction = file.marked_for_compaction; + + auto cfd = compaction->column_family_data(); + sub_compact->outputs.emplace_back(std::move(meta), + cfd->internal_comparator(), false, false, + true, file.paranoid_hash); + } + sub_compact->compaction_job_stats = compaction_result.stats; + sub_compact->num_output_records = compaction_result.num_output_records; + sub_compact->approx_size = compaction_input.approx_size; // is this used? + sub_compact->total_bytes = compaction_result.total_bytes; + RecordTick(stats_, REMOTE_COMPACT_READ_BYTES, compaction_result.bytes_read); + RecordTick(stats_, REMOTE_COMPACT_WRITE_BYTES, + compaction_result.bytes_written); + return CompactionServiceJobStatus::kSuccess; +} +#endif // !ROCKSDB_LITE + void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { - assert(sub_compact != nullptr); + assert(sub_compact); + assert(sub_compact->compaction); - uint64_t prev_cpu_micros = env_->NowCPUNanos() / 1000; +#ifndef ROCKSDB_LITE + if (db_options_.compaction_service) { + CompactionServiceJobStatus comp_status = + ProcessKeyValueCompactionWithCompactionService(sub_compact); + if (comp_status == CompactionServiceJobStatus::kSuccess || + comp_status == CompactionServiceJobStatus::kFailure) { + return; + } + // fallback to local compaction + assert(comp_status == CompactionServiceJobStatus::kUseLocal); + } +#endif // !ROCKSDB_LITE + + uint64_t prev_cpu_micros = db_options_.clock->CPUMicros(); ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); @@ -828,10 +1252,63 @@ CompactionRangeDelAggregator range_del_agg(&cfd->internal_comparator(), existing_snapshots_); + const Slice* const start = sub_compact->start; + const Slice* const end = sub_compact->end; + + ReadOptions read_options; + read_options.verify_checksums = true; + read_options.fill_cache = false; + // Compaction iterators shouldn't be confined to a single prefix. + // Compactions use Seek() for + // (a) concurrent compactions, + // (b) CompactionFilter::Decision::kRemoveAndSkipUntil. + read_options.total_order_seek = true; + + // Note: if we're going to support subcompactions for user-defined timestamps, + // the timestamp part will have to be stripped from the bounds here. + assert((!start && !end) || cfd->user_comparator()->timestamp_size() == 0); + read_options.iterate_lower_bound = start; + read_options.iterate_upper_bound = end; + // Although the v2 aggregator is what the level iterator(s) know about, // the AddTombstones calls will be propagated down to the v1 aggregator. - std::unique_ptr input(versions_->MakeInputIterator( - sub_compact->compaction, &range_del_agg, file_options_for_read_)); + std::unique_ptr raw_input( + versions_->MakeInputIterator(read_options, sub_compact->compaction, + &range_del_agg, file_options_for_read_)); + InternalIterator* input = raw_input.get(); + + IterKey start_ikey; + IterKey end_ikey; + Slice start_slice; + Slice end_slice; + + if (start) { + start_ikey.SetInternalKey(*start, kMaxSequenceNumber, kValueTypeForSeek); + start_slice = start_ikey.GetInternalKey(); + } + if (end) { + end_ikey.SetInternalKey(*end, kMaxSequenceNumber, kValueTypeForSeek); + end_slice = end_ikey.GetInternalKey(); + } + + std::unique_ptr clip; + if (start || end) { + clip.reset(new ClippingIterator( + raw_input.get(), start ? &start_slice : nullptr, + end ? &end_slice : nullptr, &cfd->internal_comparator())); + input = clip.get(); + } + + std::unique_ptr blob_counter; + + if (sub_compact->compaction->DoesInputReferenceBlobFiles()) { + sub_compact->blob_garbage_meter.reset(new BlobGarbageMeter); + blob_counter.reset( + new BlobCountingIterator(input, sub_compact->blob_garbage_meter.get())); + input = blob_counter.get(); + } + + input->SeekToFirst(); AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_COMPACTION_PROCESS_KV); @@ -857,40 +1334,51 @@ } MergeHelper merge( - env_, cfd->user_comparator(), cfd->ioptions()->merge_operator, + env_, cfd->user_comparator(), cfd->ioptions()->merge_operator.get(), compaction_filter, db_options_.info_log.get(), false /* internal key corruption is expected */, existing_snapshots_.empty() ? 0 : existing_snapshots_.back(), - snapshot_checker_, compact_->compaction->level(), - db_options_.statistics.get()); + snapshot_checker_, compact_->compaction->level(), db_options_.stats); + + const MutableCFOptions* mutable_cf_options = + sub_compact->compaction->mutable_cf_options(); + assert(mutable_cf_options); + + std::vector blob_file_paths; + + std::unique_ptr blob_file_builder( + mutable_cf_options->enable_blob_files + ? new BlobFileBuilder( + versions_, fs_.get(), + sub_compact->compaction->immutable_options(), + mutable_cf_options, &file_options_, job_id_, cfd->GetID(), + cfd->GetName(), Env::IOPriority::IO_LOW, write_hint_, + io_tracer_, blob_callback_, BlobFileCreationReason::kCompaction, + &blob_file_paths, &sub_compact->blob_file_additions) + : nullptr); TEST_SYNC_POINT("CompactionJob::Run():Inprogress"); TEST_SYNC_POINT_CALLBACK( "CompactionJob::Run():PausingManualCompaction:1", reinterpret_cast( - const_cast*>(manual_compaction_paused_))); - - Slice* start = sub_compact->start; - Slice* end = sub_compact->end; - if (start != nullptr) { - IterKey start_iter; - start_iter.SetInternalKey(*start, kMaxSequenceNumber, kValueTypeForSeek); - input->Seek(start_iter.GetInternalKey()); - } else { - input->SeekToFirst(); - } + const_cast*>(manual_compaction_paused_))); Status status; + const std::string* const full_history_ts_low = + full_history_ts_low_.empty() ? nullptr : &full_history_ts_low_; sub_compact->c_iter.reset(new CompactionIterator( - input.get(), cfd->user_comparator(), &merge, versions_->LastSequence(), + input, cfd->user_comparator(), &merge, versions_->LastSequence(), &existing_snapshots_, earliest_write_conflict_snapshot_, - snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_), false, - &range_del_agg, sub_compact->compaction, compaction_filter, - shutting_down_, preserve_deletes_seqnum_, manual_compaction_paused_, - db_options_.info_log)); + snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_), + /*expect_valid_internal_key=*/true, &range_del_agg, + blob_file_builder.get(), db_options_.allow_data_in_errors, + sub_compact->compaction, compaction_filter, shutting_down_, + preserve_deletes_seqnum_, manual_compaction_paused_, + manual_compaction_canceled_, db_options_.info_log, full_history_ts_low)); auto c_iter = sub_compact->c_iter.get(); c_iter->SeekToFirst(); if (c_iter->Valid() && sub_compact->compaction->output_level() != 0) { + sub_compact->FillFilesToCutForTtl(); // ShouldStopBefore() maintains state based on keys processed so far. The // compaction loop always calls it on the "next" key, thus won't tell it the // first key. So we do that here. @@ -899,18 +1387,21 @@ } const auto& c_iter_stats = c_iter->iter_stats(); + std::unique_ptr partitioner = + sub_compact->compaction->output_level() == 0 + ? nullptr + : sub_compact->compaction->CreateSstPartitioner(); + std::string last_key_for_partitioner; + while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) { // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid() // returns true. const Slice& key = c_iter->key(); const Slice& value = c_iter->value(); - // If an end key (exclusive) is specified, check if the current key is - // >= than it and exit if it is because the iterator is out of its range - if (end != nullptr && - cfd->user_comparator()->Compare(c_iter->user_key(), *end) >= 0) { - break; - } + assert(!end || + cfd->user_comparator()->Compare(c_iter->user_key(), *end) < 0); + if (c_iter_stats.num_input_records % kRecordStatsEvery == kRecordStatsEvery - 1) { RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats); @@ -925,10 +1416,18 @@ break; } } - assert(sub_compact->builder != nullptr); - assert(sub_compact->current_output() != nullptr); - sub_compact->builder->Add(key, value); - sub_compact->current_output_file_size = sub_compact->builder->FileSize(); + status = sub_compact->AddToBuilder(key, value); + if (!status.ok()) { + break; + } + + status = sub_compact->ProcessOutFlowIfNeeded(key, value); + if (!status.ok()) { + break; + } + + sub_compact->current_output_file_size = + sub_compact->builder->EstimatedFileSize(); const ParsedInternalKey& ikey = c_iter->ikey(); sub_compact->current_output()->meta.UpdateBoundaries( key, value, ikey.sequence, ikey.type); @@ -943,33 +1442,39 @@ // going to be 1.2MB and max_output_file_size = 1MB, prefer to have 0.6MB // and 0.6MB instead of 1MB and 0.2MB) bool output_file_ended = false; - Status input_status; if (sub_compact->compaction->output_level() != 0 && sub_compact->current_output_file_size >= sub_compact->compaction->max_output_file_size()) { // (1) this key terminates the file. For historical reasons, the iterator // status before advancing will be given to FinishCompactionOutputFile(). - input_status = input->status(); output_file_ended = true; } TEST_SYNC_POINT_CALLBACK( "CompactionJob::Run():PausingManualCompaction:2", reinterpret_cast( - const_cast*>(manual_compaction_paused_))); + const_cast*>(manual_compaction_paused_))); + if (partitioner.get()) { + last_key_for_partitioner.assign(c_iter->user_key().data_, + c_iter->user_key().size_); + } c_iter->Next(); if (c_iter->status().IsManualCompactionPaused()) { break; } - if (!output_file_ended && c_iter->Valid() && - sub_compact->compaction->output_level() != 0 && - sub_compact->ShouldStopBefore(c_iter->key(), - sub_compact->current_output_file_size) && - sub_compact->builder != nullptr) { - // (2) this key belongs to the next file. For historical reasons, the - // iterator status after advancing will be given to - // FinishCompactionOutputFile(). - input_status = input->status(); - output_file_ended = true; + if (!output_file_ended && c_iter->Valid()) { + if (((partitioner.get() && + partitioner->ShouldPartition(PartitionerRequest( + last_key_for_partitioner, c_iter->user_key(), + sub_compact->current_output_file_size)) == kRequired) || + (sub_compact->compaction->output_level() != 0 && + sub_compact->ShouldStopBefore( + c_iter->key(), sub_compact->current_output_file_size))) && + sub_compact->builder != nullptr) { + // (2) this key belongs to the next file. For historical reasons, the + // iterator status after advancing will be given to + // FinishCompactionOutputFile(). + output_file_ended = true; + } } if (output_file_ended) { const Slice* next_key = nullptr; @@ -977,14 +1482,18 @@ next_key = &c_iter->key(); } CompactionIterationStats range_del_out_stats; - status = - FinishCompactionOutputFile(input_status, sub_compact, &range_del_agg, - &range_del_out_stats, next_key); + status = FinishCompactionOutputFile(input->status(), sub_compact, + &range_del_agg, &range_del_out_stats, + next_key); RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats); } } + sub_compact->compaction_job_stats.num_blobs_read = + c_iter_stats.num_blobs_read; + sub_compact->compaction_job_stats.total_blob_bytes_read = + c_iter_stats.total_blob_bytes_read; sub_compact->compaction_job_stats.num_input_deletion_records = c_iter_stats.num_input_deletion_records; sub_compact->compaction_job_stats.num_corrupt_keys = @@ -1000,6 +1509,16 @@ RecordTick(stats_, FILTER_OPERATION_TOTAL_TIME, c_iter_stats.total_filter_time); + + if (c_iter_stats.num_blobs_relocated > 0) { + RecordTick(stats_, BLOB_DB_GC_NUM_KEYS_RELOCATED, + c_iter_stats.num_blobs_relocated); + } + if (c_iter_stats.total_blob_bytes_relocated > 0) { + RecordTick(stats_, BLOB_DB_GC_BYTES_RELOCATED, + c_iter_stats.total_blob_bytes_relocated); + } + RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats); RecordCompactionIOStats(); @@ -1012,8 +1531,10 @@ status = Status::ShutdownInProgress("Database shutdown"); } if ((status.ok() || status.IsColumnFamilyDropped()) && - (manual_compaction_paused_ && - manual_compaction_paused_->load(std::memory_order_relaxed))) { + ((manual_compaction_paused_ && + manual_compaction_paused_->load(std::memory_order_relaxed) > 0) || + (manual_compaction_canceled_ && + manual_compaction_canceled_->load(std::memory_order_relaxed)))) { status = Status::Incomplete(Status::SubCode::kManualCompactionPaused); } if (status.ok()) { @@ -1035,14 +1556,23 @@ CompactionIterationStats range_del_out_stats; Status s = FinishCompactionOutputFile(status, sub_compact, &range_del_agg, &range_del_out_stats); - if (status.ok()) { + if (!s.ok() && status.ok()) { status = s; } RecordDroppedKeys(range_del_out_stats, &sub_compact->compaction_job_stats); } + if (blob_file_builder) { + if (status.ok()) { + status = blob_file_builder->Finish(); + } else { + blob_file_builder->Abandon(status); + } + blob_file_builder.reset(); + } + sub_compact->compaction_job_stats.cpu_micros = - env_->NowCPUNanos() / 1000 - prev_cpu_micros; + db_options_.clock->CPUMicros() - prev_cpu_micros; if (measure_io_stats_) { sub_compact->compaction_job_stats.file_write_nanos += @@ -1061,12 +1591,28 @@ SetPerfLevel(prev_perf_level); } } +#ifdef ROCKSDB_ASSERT_STATUS_CHECKED + if (!status.ok()) { + if (sub_compact->c_iter) { + sub_compact->c_iter->status().PermitUncheckedError(); + } + if (input) { + input->status().PermitUncheckedError(); + } + } +#endif // ROCKSDB_ASSERT_STATUS_CHECKED sub_compact->c_iter.reset(); - input.reset(); + blob_counter.reset(); + clip.reset(); + raw_input.reset(); sub_compact->status = status; } +uint64_t CompactionJob::GetCompactionId(SubcompactionState* sub_compact) { + return (uint64_t)job_id_ << 32 | sub_compact->sub_job_id; +} + void CompactionJob::RecordDroppedKeys( const CompactionIterationStats& c_iter_stats, CompactionJobStats* compaction_job_stats) { @@ -1121,6 +1667,8 @@ ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); const Comparator* ucmp = cfd->user_comparator(); + std::string file_checksum = kUnknownFileChecksum; + std::string file_checksum_func_name = kUnknownFileChecksumFuncName; // Check for iterator errors Status s = input_status; @@ -1194,6 +1742,7 @@ } else { it->SeekToFirst(); } + TEST_SYNC_POINT("CompactionJob::FinishCompactionOutputFile1"); for (; it->Valid(); it->Next()) { auto tombstone = it->Tombstone(); if (upper_bound != nullptr) { @@ -1221,6 +1770,7 @@ auto kv = tombstone.Serialize(); assert(lower_bound == nullptr || ucmp->Compare(*lower_bound, kv.second) < 0); + // Range tombstone is not supported by output validator yet. sub_compact->builder->Add(kv.first.Encode(), kv.second); InternalKey smallest_candidate = std::move(kv.first); if (lower_bound != nullptr && @@ -1277,7 +1827,6 @@ meta->UpdateBoundariesForRange(smallest_candidate, largest_candidate, tombstone.seq_, cfd->internal_comparator()); - // The smallest key in a file is used for range tombstone truncation, so // it cannot have a seqnum of 0 (unless the smallest data key in a file // has a seqnum of 0). Otherwise, the truncated tombstone may expose @@ -1286,7 +1835,6 @@ ExtractInternalKeyFooter(meta->smallest.Encode()) != PackSequenceAndType(0, kTypeRangeDeletion)); } - meta->marked_for_compaction = sub_compact->builder->NeedCompact(); } const uint64_t current_entries = sub_compact->builder->NumEntries(); if (s.ok()) { @@ -1294,25 +1842,59 @@ } else { sub_compact->builder->Abandon(); } + IOStatus io_s = sub_compact->builder->io_status(); + if (s.ok()) { + s = io_s; + } const uint64_t current_bytes = sub_compact->builder->FileSize(); if (s.ok()) { - // Add the checksum information to file metadata. - meta->file_checksum = sub_compact->builder->GetFileChecksum(); - meta->file_checksum_func_name = - sub_compact->builder->GetFileChecksumFuncName(); - meta->fd.file_size = current_bytes; + meta->marked_for_compaction = sub_compact->builder->NeedCompact(); + // With accurate smallest and largest key, we can get a slightly more + // accurate oldest ancester time. + // This makes oldest ancester time in manifest more accurate than in + // table properties. Not sure how to resolve it. + if (meta->smallest.size() > 0 && meta->largest.size() > 0) { + uint64_t refined_oldest_ancester_time; + Slice new_smallest = meta->smallest.user_key(); + Slice new_largest = meta->largest.user_key(); + if (!new_largest.empty() && !new_smallest.empty()) { + refined_oldest_ancester_time = + sub_compact->compaction->MinInputFileOldestAncesterTime( + &(meta->smallest), &(meta->largest)); + if (refined_oldest_ancester_time != port::kMaxUint64) { + meta->oldest_ancester_time = refined_oldest_ancester_time; + } + } + } } sub_compact->current_output()->finished = true; sub_compact->total_bytes += current_bytes; // Finish and check for file errors if (s.ok()) { - StopWatch sw(env_, stats_, COMPACTION_OUTFILE_SYNC_MICROS); - s = sub_compact->outfile->Sync(db_options_.use_fsync); + StopWatch sw(db_options_.clock, stats_, COMPACTION_OUTFILE_SYNC_MICROS); + io_s = sub_compact->outfile->Sync(db_options_.use_fsync); + } + if (s.ok() && io_s.ok()) { + io_s = sub_compact->outfile->Close(); + } + if (s.ok() && io_s.ok()) { + // Add the checksum information to file metadata. + meta->file_checksum = sub_compact->outfile->GetFileChecksum(); + meta->file_checksum_func_name = + sub_compact->outfile->GetFileChecksumFuncName(); + file_checksum = meta->file_checksum; + file_checksum_func_name = meta->file_checksum_func_name; } if (s.ok()) { - s = sub_compact->outfile->Close(); + s = io_s; + } + if (sub_compact->io_status.ok()) { + sub_compact->io_status = io_s; + // Since this error is really a copy of the + // "normal" status, it does not also need to be checked + sub_compact->io_status.PermitUncheckedError(); } sub_compact->outfile.reset(); @@ -1326,9 +1908,20 @@ // This happens when the output level is bottom level, at the same time // the sub_compact output nothing. std::string fname = - TableFileName(sub_compact->compaction->immutable_cf_options()->cf_paths, + TableFileName(sub_compact->compaction->immutable_options()->cf_paths, meta->fd.GetNumber(), meta->fd.GetPathId()); - env_->DeleteFile(fname); + + // TODO(AR) it is not clear if there are any larger implications if + // DeleteFile fails here + Status ds = env_->DeleteFile(fname); + if (!ds.ok()) { + ROCKS_LOG_WARN( + db_options_.info_log, + "[%s] [JOB %d] Unable to remove SST file for table #%" PRIu64 + " at bottom level%s", + cfd->GetName().c_str(), job_id_, output_number, + meta->marked_for_compaction ? " (need compaction)" : ""); + } // Also need to remove the file from outputs, or it will be added to the // VersionEdit. @@ -1352,9 +1945,7 @@ FileDescriptor output_fd; uint64_t oldest_blob_file_number = kInvalidBlobFileNumber; if (meta != nullptr) { - fname = - TableFileName(sub_compact->compaction->immutable_cf_options()->cf_paths, - meta->fd.GetNumber(), meta->fd.GetPathId()); + fname = GetTableFileName(meta->fd.GetNumber()); output_fd = meta->fd; oldest_blob_file_number = meta->oldest_blob_file_number; } else { @@ -1363,14 +1954,18 @@ EventHelpers::LogAndNotifyTableFileCreationFinished( event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname, job_id_, output_fd, oldest_blob_file_number, tp, - TableFileCreationReason::kCompaction, s); + TableFileCreationReason::kCompaction, s, file_checksum, + file_checksum_func_name); #ifndef ROCKSDB_LITE // Report new file to SstFileManagerImpl auto sfm = static_cast(db_options_.sst_file_manager.get()); if (sfm && meta != nullptr && meta->fd.GetPathId() == 0) { - sfm->OnAddFile(fname); + Status add_s = sfm->OnAddFile(fname); + if (!add_s.ok() && s.ok()) { + s = add_s; + } if (sfm->IsMaxAllowedSpaceReached()) { // TODO(ajkr): should we return OK() if max space was reached by the final // compaction output file (similarly to how flush works when full)? @@ -1391,49 +1986,86 @@ Status CompactionJob::InstallCompactionResults( const MutableCFOptions& mutable_cf_options) { + assert(compact_); + db_mutex_->AssertHeld(); auto* compaction = compact_->compaction; - // paranoia: verify that the files that we started with - // still exist in the current version and in the same original level. - // This ensures that a concurrent compaction did not erroneously - // pick the same files to compact_. - if (!versions_->VerifyCompactionFileConsistency(compaction)) { - Compaction::InputLevelSummaryBuffer inputs_summary; - - ROCKS_LOG_ERROR(db_options_.info_log, "[%s] [JOB %d] Compaction %s aborted", - compaction->column_family_data()->GetName().c_str(), - job_id_, compaction->InputLevelSummary(&inputs_summary)); - return Status::Corruption("Compaction input files inconsistent"); - } + assert(compaction); { Compaction::InputLevelSummaryBuffer inputs_summary; - ROCKS_LOG_INFO( - db_options_.info_log, "[%s] [JOB %d] Compacted %s => %" PRIu64 " bytes", - compaction->column_family_data()->GetName().c_str(), job_id_, - compaction->InputLevelSummary(&inputs_summary), compact_->total_bytes); + ROCKS_LOG_INFO(db_options_.info_log, + "[%s] [JOB %d] Compacted %s => %" PRIu64 " bytes", + compaction->column_family_data()->GetName().c_str(), job_id_, + compaction->InputLevelSummary(&inputs_summary), + compact_->total_bytes + compact_->total_blob_bytes); } + VersionEdit* const edit = compaction->edit(); + assert(edit); + // Add compaction inputs - compaction->AddInputDeletions(compact_->compaction->edit()); + compaction->AddInputDeletions(edit); + + std::unordered_map blob_total_garbage; for (const auto& sub_compact : compact_->sub_compact_states) { for (const auto& out : sub_compact.outputs) { - compaction->edit()->AddFile(compaction->output_level(), out.meta); + edit->AddFile(compaction->output_level(), out.meta); + } + + for (const auto& blob : sub_compact.blob_file_additions) { + edit->AddBlobFile(blob); } + + if (sub_compact.blob_garbage_meter) { + const auto& flows = sub_compact.blob_garbage_meter->flows(); + + for (const auto& pair : flows) { + const uint64_t blob_file_number = pair.first; + const BlobGarbageMeter::BlobInOutFlow& flow = pair.second; + + assert(flow.IsValid()); + if (flow.HasGarbage()) { + blob_total_garbage[blob_file_number].Add(flow.GetGarbageCount(), + flow.GetGarbageBytes()); + } + } + } + } + + for (const auto& pair : blob_total_garbage) { + const uint64_t blob_file_number = pair.first; + const BlobGarbageMeter::BlobStats& stats = pair.second; + + edit->AddBlobFileGarbage(blob_file_number, stats.GetCount(), + stats.GetBytes()); } + return versions_->LogAndApply(compaction->column_family_data(), - mutable_cf_options, compaction->edit(), - db_mutex_, db_directory_); + mutable_cf_options, edit, db_mutex_, + db_directory_); } void CompactionJob::RecordCompactionIOStats() { RecordTick(stats_, COMPACT_READ_BYTES, IOSTATS(bytes_read)); + RecordTick(stats_, COMPACT_WRITE_BYTES, IOSTATS(bytes_written)); + CompactionReason compaction_reason = + compact_->compaction->compaction_reason(); + if (compaction_reason == CompactionReason::kFilesMarkedForCompaction) { + RecordTick(stats_, COMPACT_READ_BYTES_MARKED, IOSTATS(bytes_read)); + RecordTick(stats_, COMPACT_WRITE_BYTES_MARKED, IOSTATS(bytes_written)); + } else if (compaction_reason == CompactionReason::kPeriodicCompaction) { + RecordTick(stats_, COMPACT_READ_BYTES_PERIODIC, IOSTATS(bytes_read)); + RecordTick(stats_, COMPACT_WRITE_BYTES_PERIODIC, IOSTATS(bytes_written)); + } else if (compaction_reason == CompactionReason::kTtl) { + RecordTick(stats_, COMPACT_READ_BYTES_TTL, IOSTATS(bytes_read)); + RecordTick(stats_, COMPACT_WRITE_BYTES_TTL, IOSTATS(bytes_written)); + } ThreadStatusUtil::IncreaseThreadOperationProperty( ThreadStatus::COMPACTION_BYTES_READ, IOSTATS(bytes_read)); IOSTATS_RESET(bytes_read); - RecordTick(stats_, COMPACT_WRITE_BYTES, IOSTATS(bytes_written)); ThreadStatusUtil::IncreaseThreadOperationProperty( ThreadStatus::COMPACTION_BYTES_WRITTEN, IOSTATS(bytes_written)); IOSTATS_RESET(bytes_written); @@ -1445,9 +2077,7 @@ assert(sub_compact->builder == nullptr); // no need to lock because VersionSet::next_file_number_ is atomic uint64_t file_number = versions_->NewFileNumber(); - std::string fname = - TableFileName(sub_compact->compaction->immutable_cf_options()->cf_paths, - file_number, sub_compact->compaction->output_path_id()); + std::string fname = GetTableFileName(file_number); // Fire events. ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); #ifndef ROCKSDB_LITE @@ -1462,7 +2092,25 @@ TEST_SYNC_POINT_CALLBACK("CompactionJob::OpenCompactionOutputFile", &syncpoint_arg); #endif - Status s = NewWritableFile(fs_, fname, &writable_file, file_options_); + + // Pass temperature of botommost files to FileSystem. + FileOptions fo_copy = file_options_; + Temperature temperature = sub_compact->compaction->output_temperature(); + if (temperature == Temperature::kUnknown && bottommost_level_) { + temperature = + sub_compact->compaction->mutable_cf_options()->bottommost_temperature; + } + fo_copy.temperature = temperature; + + Status s; + IOStatus io_s = NewWritableFile(fs_.get(), fname, &writable_file, fo_copy); + s = io_s; + if (sub_compact->io_status.ok()) { + sub_compact->io_status = io_s; + // Since this error is really a copy of the io_s that is checked below as s, + // it does not also need to be checked. + sub_compact->io_status.PermitUncheckedError(); + } if (!s.ok()) { ROCKS_LOG_ERROR( db_options_.info_log, @@ -1474,13 +2122,14 @@ EventHelpers::LogAndNotifyTableFileCreationFinished( event_logger_, cfd->ioptions()->listeners, dbname_, cfd->GetName(), fname, job_id_, FileDescriptor(), kInvalidBlobFileNumber, - TableProperties(), TableFileCreationReason::kCompaction, s); + TableProperties(), TableFileCreationReason::kCompaction, s, + kUnknownFileChecksum, kUnknownFileChecksumFuncName); return s; } // Try to figure out the output file's oldest ancester time. int64_t temp_current_time = 0; - auto get_time_status = env_->GetCurrentTime(&temp_current_time); + auto get_time_status = db_options_.clock->GetCurrentTime(&temp_current_time); // Safe to proceed even if GetCurrentTime fails. So, log and proceed. if (!get_time_status.ok()) { ROCKS_LOG_WARN(db_options_.info_log, @@ -1488,50 +2137,62 @@ get_time_status.ToString().c_str()); } uint64_t current_time = static_cast(temp_current_time); + InternalKey tmp_start, tmp_end; + if (sub_compact->start != nullptr) { + tmp_start.SetMinPossibleForUserKey(*(sub_compact->start)); + } + if (sub_compact->end != nullptr) { + tmp_end.SetMinPossibleForUserKey(*(sub_compact->end)); + } uint64_t oldest_ancester_time = - sub_compact->compaction->MinInputFileOldestAncesterTime(); + sub_compact->compaction->MinInputFileOldestAncesterTime( + (sub_compact->start != nullptr) ? &tmp_start : nullptr, + (sub_compact->end != nullptr) ? &tmp_end : nullptr); if (oldest_ancester_time == port::kMaxUint64) { oldest_ancester_time = current_time; } // Initialize a SubcompactionState::Output and add it to sub_compact->outputs { - SubcompactionState::Output out; - out.meta.fd = FileDescriptor(file_number, - sub_compact->compaction->output_path_id(), 0); - out.meta.oldest_ancester_time = oldest_ancester_time; - out.meta.file_creation_time = current_time; - out.finished = false; - sub_compact->outputs.push_back(out); + FileMetaData meta; + meta.fd = FileDescriptor(file_number, + sub_compact->compaction->output_path_id(), 0); + meta.oldest_ancester_time = oldest_ancester_time; + meta.file_creation_time = current_time; + meta.temperature = temperature; + sub_compact->outputs.emplace_back( + std::move(meta), cfd->internal_comparator(), + /*enable_order_check=*/ + sub_compact->compaction->mutable_cf_options() + ->check_flush_compaction_key_order, + /*enable_hash=*/paranoid_file_checks_); } writable_file->SetIOPriority(Env::IOPriority::IO_LOW); writable_file->SetWriteLifeTimeHint(write_hint_); + FileTypeSet tmp_set = db_options_.checksum_handoff_file_types; writable_file->SetPreallocationBlockSize(static_cast( sub_compact->compaction->OutputFilePreallocationSize())); const auto& listeners = - sub_compact->compaction->immutable_cf_options()->listeners; - sub_compact->outfile.reset( - new WritableFileWriter(std::move(writable_file), fname, file_options_, - env_, db_options_.statistics.get(), listeners, - db_options_.sst_file_checksum_func.get())); - - // If the Column family flag is to only optimize filters for hits, - // we can skip creating filters if this is the bottommost_level where - // data is going to be found - bool skip_filters = - cfd->ioptions()->optimize_filters_for_hits && bottommost_level_; + sub_compact->compaction->immutable_options()->listeners; + sub_compact->outfile.reset(new WritableFileWriter( + std::move(writable_file), fname, file_options_, db_options_.clock, + io_tracer_, db_options_.stats, listeners, + db_options_.file_checksum_gen_factory.get(), + tmp_set.Contains(FileType::kTableFile), false)); - sub_compact->builder.reset(NewTableBuilder( + TableBuilderOptions tboptions( *cfd->ioptions(), *(sub_compact->compaction->mutable_cf_options()), cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), - cfd->GetID(), cfd->GetName(), sub_compact->outfile.get(), sub_compact->compaction->output_compression(), - 0 /*sample_for_compression */, - sub_compact->compaction->output_compression_opts(), - sub_compact->compaction->output_level(), skip_filters, - oldest_ancester_time, 0 /* oldest_key_time */, - sub_compact->compaction->max_output_file_size(), current_time)); + sub_compact->compaction->output_compression_opts(), cfd->GetID(), + cfd->GetName(), sub_compact->compaction->output_level(), + bottommost_level_, TableFileCreationReason::kCompaction, + oldest_ancester_time, 0 /* oldest_key_time */, current_time, db_id_, + db_session_id_, sub_compact->compaction->max_output_file_size(), + file_number); + sub_compact->builder.reset( + NewTableBuilder(tboptions, sub_compact->outfile.get())); LogFlush(db_options_.info_log); return s; } @@ -1554,6 +2215,9 @@ TableCache::Evict(table_cache_.get(), out.meta.fd.GetNumber()); } } + // TODO: sub_compact.io_status is not checked like status. Not sure if thats + // intentional. So ignoring the io_status as of now. + sub_compact.io_status.PermitUncheckedError(); } delete compact_; compact_ = nullptr; @@ -1571,6 +2235,8 @@ #endif // !ROCKSDB_LITE void CompactionJob::UpdateCompactionStats() { + assert(compact_); + Compaction* compaction = compact_->compaction; compaction_stats_.num_input_files_in_non_output_levels = 0; compaction_stats_.num_input_files_in_output_level = 0; @@ -1588,27 +2254,20 @@ } } - uint64_t num_output_records = 0; - - for (const auto& sub_compact : compact_->sub_compact_states) { - size_t num_output_files = sub_compact.outputs.size(); - if (sub_compact.builder != nullptr) { - // An error occurred so ignore the last output. - assert(num_output_files > 0); - --num_output_files; - } - compaction_stats_.num_output_files += static_cast(num_output_files); - - num_output_records += sub_compact.num_output_records; - - for (const auto& out : sub_compact.outputs) { - compaction_stats_.bytes_written += out.meta.fd.file_size; - } - } + assert(compaction_job_stats_); + compaction_stats_.bytes_read_blob = + compaction_job_stats_->total_blob_bytes_read; + + compaction_stats_.num_output_files = + static_cast(compact_->num_output_files); + compaction_stats_.num_output_files_blob = + static_cast(compact_->num_blob_output_files); + compaction_stats_.bytes_written = compact_->total_bytes; + compaction_stats_.bytes_written_blob = compact_->total_blob_bytes; - if (compaction_stats_.num_input_records > num_output_records) { + if (compaction_stats_.num_input_records > compact_->num_output_records) { compaction_stats_.num_dropped_records = - compaction_stats_.num_input_records - num_output_records; + compaction_stats_.num_input_records - compact_->num_output_records; } } @@ -1630,32 +2289,31 @@ void CompactionJob::UpdateCompactionJobStats( const InternalStats::CompactionStats& stats) const { #ifndef ROCKSDB_LITE - if (compaction_job_stats_) { - compaction_job_stats_->elapsed_micros = stats.micros; + compaction_job_stats_->elapsed_micros = stats.micros; - // input information - compaction_job_stats_->total_input_bytes = - stats.bytes_read_non_output_levels + stats.bytes_read_output_level; - compaction_job_stats_->num_input_records = stats.num_input_records; - compaction_job_stats_->num_input_files = - stats.num_input_files_in_non_output_levels + - stats.num_input_files_in_output_level; - compaction_job_stats_->num_input_files_at_output_level = - stats.num_input_files_in_output_level; - - // output information - compaction_job_stats_->total_output_bytes = stats.bytes_written; - compaction_job_stats_->num_output_records = compact_->num_output_records; - compaction_job_stats_->num_output_files = stats.num_output_files; - - if (compact_->NumOutputFiles() > 0U) { - CopyPrefix(compact_->SmallestUserKey(), - CompactionJobStats::kMaxPrefixLength, - &compaction_job_stats_->smallest_output_key_prefix); - CopyPrefix(compact_->LargestUserKey(), - CompactionJobStats::kMaxPrefixLength, - &compaction_job_stats_->largest_output_key_prefix); - } + // input information + compaction_job_stats_->total_input_bytes = + stats.bytes_read_non_output_levels + stats.bytes_read_output_level; + compaction_job_stats_->num_input_records = stats.num_input_records; + compaction_job_stats_->num_input_files = + stats.num_input_files_in_non_output_levels + + stats.num_input_files_in_output_level; + compaction_job_stats_->num_input_files_at_output_level = + stats.num_input_files_in_output_level; + + // output information + compaction_job_stats_->total_output_bytes = stats.bytes_written; + compaction_job_stats_->total_output_bytes_blob = stats.bytes_written_blob; + compaction_job_stats_->num_output_records = compact_->num_output_records; + compaction_job_stats_->num_output_files = stats.num_output_files; + compaction_job_stats_->num_output_files_blob = stats.num_output_files_blob; + + if (stats.num_output_files > 0) { + CopyPrefix(compact_->SmallestUserKey(), + CompactionJobStats::kMaxPrefixLength, + &compaction_job_stats_->smallest_output_key_prefix); + CopyPrefix(compact_->LargestUserKey(), CompactionJobStats::kMaxPrefixLength, + &compaction_job_stats_->largest_output_key_prefix); } #else (void)stats; @@ -1697,4 +2355,629 @@ } } +std::string CompactionJob::GetTableFileName(uint64_t file_number) { + return TableFileName(compact_->compaction->immutable_options()->cf_paths, + file_number, compact_->compaction->output_path_id()); +} + +#ifndef ROCKSDB_LITE +std::string CompactionServiceCompactionJob::GetTableFileName( + uint64_t file_number) { + return MakeTableFileName(output_path_, file_number); +} + +void CompactionServiceCompactionJob::RecordCompactionIOStats() { + compaction_result_->bytes_read += IOSTATS(bytes_read); + compaction_result_->bytes_written += IOSTATS(bytes_written); + CompactionJob::RecordCompactionIOStats(); +} + +CompactionServiceCompactionJob::CompactionServiceCompactionJob( + int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, + const MutableDBOptions& mutable_db_options, const FileOptions& file_options, + VersionSet* versions, const std::atomic* shutting_down, + LogBuffer* log_buffer, FSDirectory* output_directory, Statistics* stats, + InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler, + std::vector existing_snapshots, + std::shared_ptr table_cache, EventLogger* event_logger, + const std::string& dbname, const std::shared_ptr& io_tracer, + const std::string& db_id, const std::string& db_session_id, + const std::string& output_path, + const CompactionServiceInput& compaction_service_input, + CompactionServiceResult* compaction_service_result) + : CompactionJob( + job_id, compaction, db_options, mutable_db_options, file_options, + versions, shutting_down, 0, log_buffer, nullptr, output_directory, + nullptr, stats, db_mutex, db_error_handler, existing_snapshots, + kMaxSequenceNumber, nullptr, table_cache, event_logger, + compaction->mutable_cf_options()->paranoid_file_checks, + compaction->mutable_cf_options()->report_bg_io_stats, dbname, + &(compaction_service_result->stats), Env::Priority::USER, io_tracer, + nullptr, nullptr, db_id, db_session_id, + compaction->column_family_data()->GetFullHistoryTsLow()), + output_path_(output_path), + compaction_input_(compaction_service_input), + compaction_result_(compaction_service_result) {} + +Status CompactionServiceCompactionJob::Run() { + AutoThreadOperationStageUpdater stage_updater( + ThreadStatus::STAGE_COMPACTION_RUN); + + auto* c = compact_->compaction; + assert(c->column_family_data() != nullptr); + assert(c->column_family_data()->current()->storage_info()->NumLevelFiles( + compact_->compaction->level()) > 0); + + write_hint_ = + c->column_family_data()->CalculateSSTWriteHint(c->output_level()); + bottommost_level_ = c->bottommost_level(); + + Slice begin = compaction_input_.begin; + Slice end = compaction_input_.end; + compact_->sub_compact_states.emplace_back( + c, compaction_input_.has_begin ? &begin : nullptr, + compaction_input_.has_end ? &end : nullptr, compaction_input_.approx_size, + /*sub_job_id*/ 0); + + log_buffer_->FlushBufferToLog(); + LogCompaction(); + const uint64_t start_micros = db_options_.clock->NowMicros(); + // Pick the only sub-compaction we should have + assert(compact_->sub_compact_states.size() == 1); + SubcompactionState* sub_compact = compact_->sub_compact_states.data(); + + ProcessKeyValueCompaction(sub_compact); + + compaction_stats_.micros = db_options_.clock->NowMicros() - start_micros; + compaction_stats_.cpu_micros = sub_compact->compaction_job_stats.cpu_micros; + + RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.micros); + RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME, + compaction_stats_.cpu_micros); + + Status status = sub_compact->status; + IOStatus io_s = sub_compact->io_status; + + if (io_status_.ok()) { + io_status_ = io_s; + } + + if (status.ok()) { + constexpr IODebugContext* dbg = nullptr; + + if (output_directory_) { + io_s = output_directory_->FsyncWithDirOptions(IOOptions(), dbg, + DirFsyncOptions()); + } + } + if (io_status_.ok()) { + io_status_ = io_s; + } + if (status.ok()) { + status = io_s; + } + if (status.ok()) { + // TODO: Add verify_table() + } + + // Finish up all book-keeping to unify the subcompaction results + AggregateStatistics(); + UpdateCompactionStats(); + RecordCompactionIOStats(); + + LogFlush(db_options_.info_log); + compact_->status = status; + compact_->status.PermitUncheckedError(); + + // Build compaction result + compaction_result_->output_level = compact_->compaction->output_level(); + compaction_result_->output_path = output_path_; + for (const auto& output_file : sub_compact->outputs) { + auto& meta = output_file.meta; + compaction_result_->output_files.emplace_back( + MakeTableFileName(meta.fd.GetNumber()), meta.fd.smallest_seqno, + meta.fd.largest_seqno, meta.smallest.Encode().ToString(), + meta.largest.Encode().ToString(), meta.oldest_ancester_time, + meta.file_creation_time, output_file.validator.GetHash(), + meta.marked_for_compaction); + } + compaction_result_->num_output_records = sub_compact->num_output_records; + compaction_result_->total_bytes = sub_compact->total_bytes; + + return status; +} + +void CompactionServiceCompactionJob::CleanupCompaction() { + CompactionJob::CleanupCompaction(); +} + +// Internal binary format for the input and result data +enum BinaryFormatVersion : uint32_t { + kOptionsString = 1, // Use string format similar to Option string format +}; + +// offset_of is used to get the offset of a class data member +// ex: offset_of(&ColumnFamilyDescriptor::options) +// This call will return the offset of options in ColumnFamilyDescriptor class +// +// This is the same as offsetof() but allow us to work with non standard-layout +// classes and structures +// refs: +// http://en.cppreference.com/w/cpp/concept/StandardLayoutType +// https://gist.github.com/graphitemaster/494f21190bb2c63c5516 +static ColumnFamilyDescriptor dummy_cfd("", ColumnFamilyOptions()); +template +int offset_of(T1 ColumnFamilyDescriptor::*member) { + return int(size_t(&(dummy_cfd.*member)) - size_t(&dummy_cfd)); +} + +static CompactionServiceInput dummy_cs_input; +template +int offset_of(T1 CompactionServiceInput::*member) { + return int(size_t(&(dummy_cs_input.*member)) - size_t(&dummy_cs_input)); +} + +static std::unordered_map cfd_type_info = { + {"name", + {offset_of(&ColumnFamilyDescriptor::name), OptionType::kEncodedString, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"options", + {offset_of(&ColumnFamilyDescriptor::options), OptionType::kConfigurable, + OptionVerificationType::kNormal, OptionTypeFlags::kNone, + [](const ConfigOptions& opts, const std::string& /*name*/, + const std::string& value, void* addr) { + auto cf_options = static_cast(addr); + return GetColumnFamilyOptionsFromString(opts, ColumnFamilyOptions(), + value, cf_options); + }, + [](const ConfigOptions& opts, const std::string& /*name*/, + const void* addr, std::string* value) { + const auto cf_options = static_cast(addr); + std::string result; + auto status = + GetStringFromColumnFamilyOptions(opts, *cf_options, &result); + *value = "{" + result + "}"; + return status; + }, + [](const ConfigOptions& opts, const std::string& name, const void* addr1, + const void* addr2, std::string* mismatch) { + const auto this_one = static_cast(addr1); + const auto that_one = static_cast(addr2); + auto this_conf = CFOptionsAsConfigurable(*this_one); + auto that_conf = CFOptionsAsConfigurable(*that_one); + std::string mismatch_opt; + bool result = + this_conf->AreEquivalent(opts, that_conf.get(), &mismatch_opt); + if (!result) { + *mismatch = name + "." + mismatch_opt; + } + return result; + }}}, +}; + +static std::unordered_map cs_input_type_info = { + {"column_family", + OptionTypeInfo::Struct("column_family", &cfd_type_info, + offset_of(&CompactionServiceInput::column_family), + OptionVerificationType::kNormal, + OptionTypeFlags::kNone)}, + {"db_options", + {offset_of(&CompactionServiceInput::db_options), OptionType::kConfigurable, + OptionVerificationType::kNormal, OptionTypeFlags::kNone, + [](const ConfigOptions& opts, const std::string& /*name*/, + const std::string& value, void* addr) { + auto options = static_cast(addr); + return GetDBOptionsFromString(opts, DBOptions(), value, options); + }, + [](const ConfigOptions& opts, const std::string& /*name*/, + const void* addr, std::string* value) { + const auto options = static_cast(addr); + std::string result; + auto status = GetStringFromDBOptions(opts, *options, &result); + *value = "{" + result + "}"; + return status; + }, + [](const ConfigOptions& opts, const std::string& name, const void* addr1, + const void* addr2, std::string* mismatch) { + const auto this_one = static_cast(addr1); + const auto that_one = static_cast(addr2); + auto this_conf = DBOptionsAsConfigurable(*this_one); + auto that_conf = DBOptionsAsConfigurable(*that_one); + std::string mismatch_opt; + bool result = + this_conf->AreEquivalent(opts, that_conf.get(), &mismatch_opt); + if (!result) { + *mismatch = name + "." + mismatch_opt; + } + return result; + }}}, + {"snapshots", OptionTypeInfo::Vector( + offset_of(&CompactionServiceInput::snapshots), + OptionVerificationType::kNormal, OptionTypeFlags::kNone, + {0, OptionType::kUInt64T})}, + {"input_files", OptionTypeInfo::Vector( + offset_of(&CompactionServiceInput::input_files), + OptionVerificationType::kNormal, OptionTypeFlags::kNone, + {0, OptionType::kEncodedString})}, + {"output_level", + {offset_of(&CompactionServiceInput::output_level), OptionType::kInt, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"has_begin", + {offset_of(&CompactionServiceInput::has_begin), OptionType::kBoolean, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"begin", + {offset_of(&CompactionServiceInput::begin), OptionType::kEncodedString, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"has_end", + {offset_of(&CompactionServiceInput::has_end), OptionType::kBoolean, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"end", + {offset_of(&CompactionServiceInput::end), OptionType::kEncodedString, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"approx_size", + {offset_of(&CompactionServiceInput::approx_size), OptionType::kUInt64T, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, +}; + +static std::unordered_map + cs_output_file_type_info = { + {"file_name", + {offsetof(struct CompactionServiceOutputFile, file_name), + OptionType::kEncodedString, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"smallest_seqno", + {offsetof(struct CompactionServiceOutputFile, smallest_seqno), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"largest_seqno", + {offsetof(struct CompactionServiceOutputFile, largest_seqno), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"smallest_internal_key", + {offsetof(struct CompactionServiceOutputFile, smallest_internal_key), + OptionType::kEncodedString, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"largest_internal_key", + {offsetof(struct CompactionServiceOutputFile, largest_internal_key), + OptionType::kEncodedString, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"oldest_ancester_time", + {offsetof(struct CompactionServiceOutputFile, oldest_ancester_time), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"file_creation_time", + {offsetof(struct CompactionServiceOutputFile, file_creation_time), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"paranoid_hash", + {offsetof(struct CompactionServiceOutputFile, paranoid_hash), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"marked_for_compaction", + {offsetof(struct CompactionServiceOutputFile, marked_for_compaction), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, +}; + +static std::unordered_map + compaction_job_stats_type_info = { + {"elapsed_micros", + {offsetof(struct CompactionJobStats, elapsed_micros), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"cpu_micros", + {offsetof(struct CompactionJobStats, cpu_micros), OptionType::kUInt64T, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"num_input_records", + {offsetof(struct CompactionJobStats, num_input_records), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_blobs_read", + {offsetof(struct CompactionJobStats, num_blobs_read), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_input_files", + {offsetof(struct CompactionJobStats, num_input_files), + OptionType::kSizeT, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_input_files_at_output_level", + {offsetof(struct CompactionJobStats, num_input_files_at_output_level), + OptionType::kSizeT, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_output_records", + {offsetof(struct CompactionJobStats, num_output_records), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_output_files", + {offsetof(struct CompactionJobStats, num_output_files), + OptionType::kSizeT, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_output_files_blob", + {offsetof(struct CompactionJobStats, num_output_files_blob), + OptionType::kSizeT, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"is_full_compaction", + {offsetof(struct CompactionJobStats, is_full_compaction), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"is_manual_compaction", + {offsetof(struct CompactionJobStats, is_manual_compaction), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"total_input_bytes", + {offsetof(struct CompactionJobStats, total_input_bytes), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"total_blob_bytes_read", + {offsetof(struct CompactionJobStats, total_blob_bytes_read), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"total_output_bytes", + {offsetof(struct CompactionJobStats, total_output_bytes), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"total_output_bytes_blob", + {offsetof(struct CompactionJobStats, total_output_bytes_blob), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_records_replaced", + {offsetof(struct CompactionJobStats, num_records_replaced), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"total_input_raw_key_bytes", + {offsetof(struct CompactionJobStats, total_input_raw_key_bytes), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"total_input_raw_value_bytes", + {offsetof(struct CompactionJobStats, total_input_raw_value_bytes), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_input_deletion_records", + {offsetof(struct CompactionJobStats, num_input_deletion_records), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_expired_deletion_records", + {offsetof(struct CompactionJobStats, num_expired_deletion_records), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_corrupt_keys", + {offsetof(struct CompactionJobStats, num_corrupt_keys), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"file_write_nanos", + {offsetof(struct CompactionJobStats, file_write_nanos), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"file_range_sync_nanos", + {offsetof(struct CompactionJobStats, file_range_sync_nanos), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"file_fsync_nanos", + {offsetof(struct CompactionJobStats, file_fsync_nanos), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"file_prepare_write_nanos", + {offsetof(struct CompactionJobStats, file_prepare_write_nanos), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"smallest_output_key_prefix", + {offsetof(struct CompactionJobStats, smallest_output_key_prefix), + OptionType::kEncodedString, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"largest_output_key_prefix", + {offsetof(struct CompactionJobStats, largest_output_key_prefix), + OptionType::kEncodedString, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_single_del_fallthru", + {offsetof(struct CompactionJobStats, num_single_del_fallthru), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_single_del_mismatch", + {offsetof(struct CompactionJobStats, num_single_del_mismatch), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, +}; + +namespace { +// this is a helper struct to serialize and deserialize class Status, because +// Status's members are not public. +struct StatusSerializationAdapter { + uint8_t code; + uint8_t subcode; + uint8_t severity; + std::string message; + + StatusSerializationAdapter() {} + explicit StatusSerializationAdapter(const Status& s) { + code = s.code(); + subcode = s.subcode(); + severity = s.severity(); + auto msg = s.getState(); + message = msg ? msg : ""; + } + + Status GetStatus() { + return Status(static_cast(code), + static_cast(subcode), + static_cast(severity), message); + } +}; +} // namespace + +static std::unordered_map + status_adapter_type_info = { + {"code", + {offsetof(struct StatusSerializationAdapter, code), + OptionType::kUInt8T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"subcode", + {offsetof(struct StatusSerializationAdapter, subcode), + OptionType::kUInt8T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"severity", + {offsetof(struct StatusSerializationAdapter, severity), + OptionType::kUInt8T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"message", + {offsetof(struct StatusSerializationAdapter, message), + OptionType::kEncodedString, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, +}; + +static std::unordered_map cs_result_type_info = { + {"status", + {offsetof(struct CompactionServiceResult, status), + OptionType::kCustomizable, OptionVerificationType::kNormal, + OptionTypeFlags::kNone, + [](const ConfigOptions& opts, const std::string& /*name*/, + const std::string& value, void* addr) { + auto status_obj = static_cast(addr); + StatusSerializationAdapter adapter; + Status s = OptionTypeInfo::ParseType( + opts, value, status_adapter_type_info, &adapter); + *status_obj = adapter.GetStatus(); + return s; + }, + [](const ConfigOptions& opts, const std::string& /*name*/, + const void* addr, std::string* value) { + const auto status_obj = static_cast(addr); + StatusSerializationAdapter adapter(*status_obj); + std::string result; + Status s = OptionTypeInfo::SerializeType(opts, status_adapter_type_info, + &adapter, &result); + *value = "{" + result + "}"; + return s; + }, + [](const ConfigOptions& opts, const std::string& /*name*/, + const void* addr1, const void* addr2, std::string* mismatch) { + const auto status1 = static_cast(addr1); + const auto status2 = static_cast(addr2); + StatusSerializationAdapter adatper1(*status1); + StatusSerializationAdapter adapter2(*status2); + return OptionTypeInfo::TypesAreEqual(opts, status_adapter_type_info, + &adatper1, &adapter2, mismatch); + }}}, + {"output_files", + OptionTypeInfo::Vector( + offsetof(struct CompactionServiceResult, output_files), + OptionVerificationType::kNormal, OptionTypeFlags::kNone, + OptionTypeInfo::Struct("output_files", &cs_output_file_type_info, 0, + OptionVerificationType::kNormal, + OptionTypeFlags::kNone))}, + {"output_level", + {offsetof(struct CompactionServiceResult, output_level), OptionType::kInt, + OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"output_path", + {offsetof(struct CompactionServiceResult, output_path), + OptionType::kEncodedString, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"num_output_records", + {offsetof(struct CompactionServiceResult, num_output_records), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"total_bytes", + {offsetof(struct CompactionServiceResult, total_bytes), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"bytes_read", + {offsetof(struct CompactionServiceResult, bytes_read), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"bytes_written", + {offsetof(struct CompactionServiceResult, bytes_written), + OptionType::kUInt64T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"stats", OptionTypeInfo::Struct( + "stats", &compaction_job_stats_type_info, + offsetof(struct CompactionServiceResult, stats), + OptionVerificationType::kNormal, OptionTypeFlags::kNone)}, +}; + +Status CompactionServiceInput::Read(const std::string& data_str, + CompactionServiceInput* obj) { + if (data_str.size() <= sizeof(BinaryFormatVersion)) { + return Status::InvalidArgument("Invalid CompactionServiceInput string"); + } + auto format_version = DecodeFixed32(data_str.data()); + if (format_version == kOptionsString) { + ConfigOptions cf; + cf.invoke_prepare_options = false; + cf.ignore_unknown_options = true; + return OptionTypeInfo::ParseType( + cf, data_str.substr(sizeof(BinaryFormatVersion)), cs_input_type_info, + obj); + } else { + return Status::NotSupported( + "Compaction Service Input data version not supported: " + + ToString(format_version)); + } +} + +Status CompactionServiceInput::Write(std::string* output) { + char buf[sizeof(BinaryFormatVersion)]; + EncodeFixed32(buf, kOptionsString); + output->append(buf, sizeof(BinaryFormatVersion)); + ConfigOptions cf; + cf.invoke_prepare_options = false; + return OptionTypeInfo::SerializeType(cf, cs_input_type_info, this, output); +} + +Status CompactionServiceResult::Read(const std::string& data_str, + CompactionServiceResult* obj) { + if (data_str.size() <= sizeof(BinaryFormatVersion)) { + return Status::InvalidArgument("Invalid CompactionServiceResult string"); + } + auto format_version = DecodeFixed32(data_str.data()); + if (format_version == kOptionsString) { + ConfigOptions cf; + cf.invoke_prepare_options = false; + cf.ignore_unknown_options = true; + return OptionTypeInfo::ParseType( + cf, data_str.substr(sizeof(BinaryFormatVersion)), cs_result_type_info, + obj); + } else { + return Status::NotSupported( + "Compaction Service Result data version not supported: " + + ToString(format_version)); + } +} + +Status CompactionServiceResult::Write(std::string* output) { + char buf[sizeof(BinaryFormatVersion)]; + EncodeFixed32(buf, kOptionsString); + output->append(buf, sizeof(BinaryFormatVersion)); + ConfigOptions cf; + cf.invoke_prepare_options = false; + return OptionTypeInfo::SerializeType(cf, cs_result_type_info, this, output); +} + +#ifndef NDEBUG +bool CompactionServiceResult::TEST_Equals(CompactionServiceResult* other) { + std::string mismatch; + return TEST_Equals(other, &mismatch); +} + +bool CompactionServiceResult::TEST_Equals(CompactionServiceResult* other, + std::string* mismatch) { + ConfigOptions cf; + cf.invoke_prepare_options = false; + return OptionTypeInfo::TypesAreEqual(cf, cs_result_type_info, this, other, + mismatch); +} + +bool CompactionServiceInput::TEST_Equals(CompactionServiceInput* other) { + std::string mismatch; + return TEST_Equals(other, &mismatch); +} + +bool CompactionServiceInput::TEST_Equals(CompactionServiceInput* other, + std::string* mismatch) { + ConfigOptions cf; + cf.invoke_prepare_options = false; + return OptionTypeInfo::TypesAreEqual(cf, cs_input_type_info, this, other, + mismatch); +} +#endif // NDEBUG +#endif // !ROCKSDB_LITE + } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_job.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_job.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job.h 2025-05-19 16:14:27.000000000 +0000 @@ -17,9 +17,9 @@ #include #include +#include "db/blob/blob_file_completion_callback.h" #include "db/column_family.h" #include "db/compaction/compaction_iterator.h" -#include "db/dbformat.h" #include "db/flush_scheduler.h" #include "db/internal_stats.h" #include "db/job_context.h" @@ -50,6 +50,7 @@ class ErrorHandler; class MemTable; class SnapshotChecker; +class SystemClock; class TableCache; class Version; class VersionEdit; @@ -62,25 +63,29 @@ // if needed. class CompactionJob { public: - CompactionJob(int job_id, Compaction* compaction, - const ImmutableDBOptions& db_options, - const FileOptions& file_options, VersionSet* versions, - const std::atomic* shutting_down, - const SequenceNumber preserve_deletes_seqnum, - LogBuffer* log_buffer, Directory* db_directory, - Directory* output_directory, Statistics* stats, - InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler, - std::vector existing_snapshots, - SequenceNumber earliest_write_conflict_snapshot, - const SnapshotChecker* snapshot_checker, - std::shared_ptr table_cache, EventLogger* event_logger, - bool paranoid_file_checks, bool measure_io_stats, - const std::string& dbname, - CompactionJobStats* compaction_job_stats, - Env::Priority thread_pri, - const std::atomic* manual_compaction_paused = nullptr); + CompactionJob( + int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, + const MutableDBOptions& mutable_db_options, + const FileOptions& file_options, VersionSet* versions, + const std::atomic* shutting_down, + const SequenceNumber preserve_deletes_seqnum, LogBuffer* log_buffer, + FSDirectory* db_directory, FSDirectory* output_directory, + FSDirectory* blob_output_directory, Statistics* stats, + InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler, + std::vector existing_snapshots, + SequenceNumber earliest_write_conflict_snapshot, + const SnapshotChecker* snapshot_checker, + std::shared_ptr table_cache, EventLogger* event_logger, + bool paranoid_file_checks, bool measure_io_stats, + const std::string& dbname, CompactionJobStats* compaction_job_stats, + Env::Priority thread_pri, const std::shared_ptr& io_tracer, + const std::atomic* manual_compaction_paused = nullptr, + const std::atomic* manual_compaction_canceled = nullptr, + const std::string& db_id = "", const std::string& db_session_id = "", + std::string full_history_ts_low = "", + BlobFileCompletionCallback* blob_callback = nullptr); - ~CompactionJob(); + virtual ~CompactionJob(); // no copy/move CompactionJob(CompactionJob&& job) = delete; @@ -100,11 +105,39 @@ // Add compaction input/output to the current version Status Install(const MutableCFOptions& mutable_cf_options); - private: + // Return the IO status + IOStatus io_status() const { return io_status_; } + + protected: struct SubcompactionState; + // CompactionJob state + struct CompactionState; void AggregateStatistics(); + void UpdateCompactionStats(); + void LogCompaction(); + virtual void RecordCompactionIOStats(); + void CleanupCompaction(); + + // Call compaction filter. Then iterate through input and compact the + // kv-pairs + void ProcessKeyValueCompaction(SubcompactionState* sub_compact); + CompactionState* compact_; + InternalStats::CompactionStats compaction_stats_; + const ImmutableDBOptions& db_options_; + const MutableDBOptions mutable_db_options_copy_; + LogBuffer* log_buffer_; + FSDirectory* output_directory_; + Statistics* stats_; + // Is this compaction creating a file in the bottom most level? + bool bottommost_level_; + + Env::WriteLifeTimeHint write_hint_; + + IOStatus io_status_; + + private: // Generates a histogram representing potential divisions of key ranges from // the input. It adds the starting and/or ending keys of certain input files // to the working set and then finds the approximate size of data in between @@ -112,12 +145,12 @@ // consecutive groups such that each group has a similar size. void GenSubcompactionBoundaries(); + CompactionServiceJobStatus ProcessKeyValueCompactionWithCompactionService( + SubcompactionState* sub_compact); + // update the thread status for starting a compaction. void ReportStartedCompaction(Compaction* compaction); void AllocateCompactionOutputFileNumbers(); - // Call compaction filter. Then iterate through input and compact the - // kv-pairs - void ProcessKeyValueCompaction(SubcompactionState* sub_compact); Status FinishCompactionOutputFile( const Status& input_status, SubcompactionState* sub_compact, @@ -125,45 +158,37 @@ CompactionIterationStats* range_del_out_stats, const Slice* next_table_min_key = nullptr); Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options); - void RecordCompactionIOStats(); Status OpenCompactionOutputFile(SubcompactionState* sub_compact); - void CleanupCompaction(); void UpdateCompactionJobStats( const InternalStats::CompactionStats& stats) const; void RecordDroppedKeys(const CompactionIterationStats& c_iter_stats, CompactionJobStats* compaction_job_stats = nullptr); - void UpdateCompactionStats(); void UpdateCompactionInputStatsHelper( int* num_files, uint64_t* bytes_read, int input_level); - void LogCompaction(); - - int job_id_; + uint32_t job_id_; - // CompactionJob state - struct CompactionState; - CompactionState* compact_; CompactionJobStats* compaction_job_stats_; - InternalStats::CompactionStats compaction_stats_; // DBImpl state const std::string& dbname_; - const ImmutableDBOptions& db_options_; + const std::string db_id_; + const std::string db_session_id_; const FileOptions file_options_; Env* env_; - FileSystem* fs_; + std::shared_ptr io_tracer_; + FileSystemPtr fs_; // env_option optimized for compaction table reads FileOptions file_options_for_read_; VersionSet* versions_; const std::atomic* shutting_down_; - const std::atomic* manual_compaction_paused_; + const std::atomic* manual_compaction_paused_; + const std::atomic* manual_compaction_canceled_; const SequenceNumber preserve_deletes_seqnum_; - LogBuffer* log_buffer_; - Directory* db_directory_; - Directory* output_directory_; - Statistics* stats_; + FSDirectory* db_directory_; + FSDirectory* blob_output_directory_; InstrumentedMutex* db_mutex_; ErrorHandler* db_error_handler_; // If there were two snapshots with seq numbers s1 and @@ -183,16 +208,158 @@ EventLogger* event_logger_; - // Is this compaction creating a file in the bottom most level? - bool bottommost_level_; bool paranoid_file_checks_; bool measure_io_stats_; // Stores the Slices that designate the boundaries for each subcompaction std::vector boundaries_; // Stores the approx size of keys covered in the range of each subcompaction std::vector sizes_; - Env::WriteLifeTimeHint write_hint_; Env::Priority thread_pri_; + std::string full_history_ts_low_; + BlobFileCompletionCallback* blob_callback_; + + uint64_t GetCompactionId(SubcompactionState* sub_compact); + + // Get table file name in where it's outputting to, which should also be in + // `output_directory_`. + virtual std::string GetTableFileName(uint64_t file_number); +}; + +// CompactionServiceInput is used the pass compaction information between two +// db instances. It contains the information needed to do a compaction. It +// doesn't contain the LSM tree information, which is passed though MANIFEST +// file. +struct CompactionServiceInput { + ColumnFamilyDescriptor column_family; + + DBOptions db_options; + + std::vector snapshots; + + // SST files for compaction, it should already be expended to include all the + // files needed for this compaction, for both input level files and output + // level files. + std::vector input_files; + int output_level; + + // information for subcompaction + bool has_begin = false; + std::string begin; + bool has_end = false; + std::string end; + uint64_t approx_size = 0; + + // serialization interface to read and write the object + static Status Read(const std::string& data_str, CompactionServiceInput* obj); + Status Write(std::string* output); + + // Initialize a dummy ColumnFamilyDescriptor + CompactionServiceInput() : column_family("", ColumnFamilyOptions()) {} + +#ifndef NDEBUG + bool TEST_Equals(CompactionServiceInput* other); + bool TEST_Equals(CompactionServiceInput* other, std::string* mismatch); +#endif // NDEBUG +}; + +// CompactionServiceOutputFile is the metadata for the output SST file +struct CompactionServiceOutputFile { + std::string file_name; + SequenceNumber smallest_seqno; + SequenceNumber largest_seqno; + std::string smallest_internal_key; + std::string largest_internal_key; + uint64_t oldest_ancester_time; + uint64_t file_creation_time; + uint64_t paranoid_hash; + bool marked_for_compaction; + + CompactionServiceOutputFile() = default; + CompactionServiceOutputFile( + const std::string& name, SequenceNumber smallest, SequenceNumber largest, + std::string _smallest_internal_key, std::string _largest_internal_key, + uint64_t _oldest_ancester_time, uint64_t _file_creation_time, + uint64_t _paranoid_hash, bool _marked_for_compaction) + : file_name(name), + smallest_seqno(smallest), + largest_seqno(largest), + smallest_internal_key(std::move(_smallest_internal_key)), + largest_internal_key(std::move(_largest_internal_key)), + oldest_ancester_time(_oldest_ancester_time), + file_creation_time(_file_creation_time), + paranoid_hash(_paranoid_hash), + marked_for_compaction(_marked_for_compaction) {} +}; + +// CompactionServiceResult contains the compaction result from a different db +// instance, with these information, the primary db instance with write +// permission is able to install the result to the DB. +struct CompactionServiceResult { + Status status; + std::vector output_files; + int output_level; + + // location of the output files + std::string output_path; + + // some statistics about the compaction + uint64_t num_output_records = 0; + uint64_t total_bytes = 0; + uint64_t bytes_read = 0; + uint64_t bytes_written = 0; + CompactionJobStats stats; + + // serialization interface to read and write the object + static Status Read(const std::string& data_str, CompactionServiceResult* obj); + Status Write(std::string* output); + +#ifndef NDEBUG + bool TEST_Equals(CompactionServiceResult* other); + bool TEST_Equals(CompactionServiceResult* other, std::string* mismatch); +#endif // NDEBUG +}; + +// CompactionServiceCompactionJob is an read-only compaction job, it takes +// input information from `compaction_service_input` and put result information +// in `compaction_service_result`, the SST files are generated to `output_path`. +class CompactionServiceCompactionJob : private CompactionJob { + public: + CompactionServiceCompactionJob( + int job_id, Compaction* compaction, const ImmutableDBOptions& db_options, + const MutableDBOptions& mutable_db_options, + const FileOptions& file_options, VersionSet* versions, + const std::atomic* shutting_down, LogBuffer* log_buffer, + FSDirectory* output_directory, Statistics* stats, + InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler, + std::vector existing_snapshots, + std::shared_ptr table_cache, EventLogger* event_logger, + const std::string& dbname, const std::shared_ptr& io_tracer, + const std::string& db_id, const std::string& db_session_id, + const std::string& output_path, + const CompactionServiceInput& compaction_service_input, + CompactionServiceResult* compaction_service_result); + + // Run the compaction in current thread and return the result + Status Run(); + + void CleanupCompaction(); + + IOStatus io_status() const { return CompactionJob::io_status(); } + + protected: + void RecordCompactionIOStats() override; + + private: + // Get table file name in output_path + std::string GetTableFileName(uint64_t file_number) override; + // Specific the compaction output path, otherwise it uses default DB path + const std::string output_path_; + + // Compaction job input + const CompactionServiceInput& compaction_input_; + + // Compaction job result + CompactionServiceResult* compaction_result_; }; } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -24,8 +24,6 @@ #include "db/write_batch_internal.h" #include "env/mock_env.h" #include "file/filename.h" -#include "logging/logging.h" -#include "memtable/hash_linklist_rep.h" #include "monitoring/statistics.h" #include "monitoring/thread_status_util.h" #include "port/stack_trace.h" @@ -52,6 +50,7 @@ #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "test_util/testutil.h" +#include "util/cast_util.h" #include "util/compression.h" #include "util/hash.h" #include "util/mutexlock.h" @@ -126,9 +125,7 @@ static void SetUpTestCase() {} static void TearDownTestCase() {} - DBImpl* dbfull() { - return reinterpret_cast(db_); - } + DBImpl* dbfull() { return static_cast_with_check(db_); } void CreateColumnFamilies(const std::vector& cfs, const Options& options) { @@ -271,10 +268,10 @@ if (cf == 0) { // default cfd EXPECT_TRUE(db_->GetProperty( - "rocksdb.num-files-at-level" + NumberToString(level), &property)); + "rocksdb.num-files-at-level" + ToString(level), &property)); } else { EXPECT_TRUE(db_->GetProperty( - handles_[cf], "rocksdb.num-files-at-level" + NumberToString(level), + handles_[cf], "rocksdb.num-files-at-level" + ToString(level), &property)); } return atoi(property.c_str()); @@ -299,15 +296,14 @@ return result; } - uint64_t Size(const Slice& start, const Slice& limit, int cf = 0) { + Status Size(uint64_t* size, const Slice& start, const Slice& limit, + int cf = 0) { Range r(start, limit); - uint64_t size; if (cf == 0) { - db_->GetApproximateSizes(&r, 1, &size); + return db_->GetApproximateSizes(&r, 1, size); } else { - db_->GetApproximateSizes(handles_[1], &r, 1, &size); + return db_->GetApproximateSizes(handles_[1], &r, 1, size); } - return size; } void Compact(int cf, const Slice& start, const Slice& limit, @@ -460,6 +456,7 @@ ASSERT_EQ(current_stats.num_output_files, stats.num_output_files); + ASSERT_EQ(current_stats.is_full_compaction, stats.is_full_compaction); ASSERT_EQ(current_stats.is_manual_compaction, stats.is_manual_compaction); @@ -572,7 +569,7 @@ uint64_t num_input_records, size_t key_size, size_t value_size, size_t num_output_files, uint64_t num_output_records, double compression_ratio, uint64_t num_records_replaced, - bool is_manual = true) { + bool is_full = false, bool is_manual = true) { CompactionJobStats stats; stats.Reset(); @@ -596,6 +593,7 @@ stats.total_input_raw_value_bytes = num_input_records * value_size; + stats.is_full_compaction = is_full; stats.is_manual_compaction = is_manual; stats.num_records_replaced = num_records_replaced; @@ -797,7 +795,7 @@ } ASSERT_OK(Flush(1)); - reinterpret_cast(db_)->TEST_WaitForCompact(); + ASSERT_OK(static_cast_with_check(db_)->TEST_WaitForCompact()); stats_checker->set_verify_next_comp_io_stats(true); std::atomic first_prepare_write(true); @@ -895,7 +893,7 @@ CompactRangeOptions cr_options; cr_options.change_level = true; cr_options.target_level = 2; - db_->CompactRange(cr_options, handles_[1], nullptr, nullptr); + ASSERT_OK(db_->CompactRange(cr_options, handles_[1], nullptr, nullptr)); ASSERT_GT(NumTableFilesAtLevel(2, 1), 0); // Stage 2: Generate files including keys from the entire key range @@ -982,26 +980,21 @@ if (num_input_units == 0) { continue; } + // A full compaction only happens when the number of flushes equals to + // the number of compaction input runs. + bool is_full = num_flushes == num_input_units; // The following statement determines the expected smallest key - // based on whether it is a full compaction. A full compaction only - // happens when the number of flushes equals to the number of compaction - // input runs. - uint64_t smallest_key = - (num_flushes == num_input_units) ? - key_base : key_base * (num_flushes - 1); + // based on whether it is a full compaction. + uint64_t smallest_key = is_full ? key_base : key_base * (num_flushes - 1); - stats_checker->AddExpectedStats( - NewManualCompactionJobStats( - Key(smallest_key, 10), - Key(smallest_key + key_base * num_input_units - key_interval, 10), - num_input_units, - num_input_units > 2 ? num_input_units / 2 : 0, - num_keys_per_table * num_input_units, - kKeySize, kValueSize, - num_input_units, - num_keys_per_table * num_input_units, - 1.0, 0, false)); - dbfull()->TEST_WaitForCompact(); + stats_checker->AddExpectedStats(NewManualCompactionJobStats( + Key(smallest_key, 10), + Key(smallest_key + key_base * num_input_units - key_interval, 10), + num_input_units, num_input_units > 2 ? num_input_units / 2 : 0, + num_keys_per_table * num_input_units, kKeySize, kValueSize, + num_input_units, num_keys_per_table * num_input_units, 1.0, 0, is_full, + false)); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); } ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 3U); @@ -1012,7 +1005,7 @@ &rnd, start_key, start_key + key_base - 1, kKeySize, kValueSize, key_interval, compression_ratio, 1); - reinterpret_cast(db_)->TEST_WaitForCompact(); + ASSERT_OK(static_cast_with_check(db_)->TEST_WaitForCompact()); } ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 0U); } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_job_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_job_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_job_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -5,6 +5,8 @@ #ifndef ROCKSDB_LITE +#include "db/compaction/compaction_job.h" + #include #include #include @@ -12,15 +14,16 @@ #include #include -#include "db/blob_index.h" +#include "db/blob/blob_index.h" #include "db/column_family.h" -#include "db/compaction/compaction_job.h" #include "db/db_impl/db_impl.h" #include "db/error_handler.h" #include "db/version_set.h" #include "file/writable_file_writer.h" #include "rocksdb/cache.h" +#include "rocksdb/convenience.h" #include "rocksdb/db.h" +#include "rocksdb/file_system.h" #include "rocksdb/options.h" #include "rocksdb/write_buffer_manager.h" #include "table/mock_table.h" @@ -67,30 +70,42 @@ } // namespace -// TODO(icanadi) Make it simpler once we mock out VersionSet -class CompactionJobTest : public testing::Test { - public: - CompactionJobTest() - : env_(Env::Default()), - fs_(std::make_shared(env_)), - dbname_(test::PerThreadDBPath("compaction_job_test")), +class CompactionJobTestBase : public testing::Test { + protected: + CompactionJobTestBase(std::string dbname, const Comparator* ucmp, + std::function encode_u64_ts) + : dbname_(std::move(dbname)), + ucmp_(ucmp), db_options_(), mutable_cf_options_(cf_options_), + mutable_db_options_(), table_cache_(NewLRUCache(50000, 16)), write_buffer_manager_(db_options_.db_write_buffer_size), versions_(new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr)), + /*block_cache_tracer=*/nullptr, + /*io_tracer=*/nullptr, /*db_session_id*/ "")), shutting_down_(false), preserve_deletes_seqnum_(0), mock_table_factory_(new mock::MockTableFactory()), - error_handler_(nullptr, db_options_, &mutex_) { + error_handler_(nullptr, db_options_, &mutex_), + encode_u64_ts_(std::move(encode_u64_ts)) { + Env* base_env = Env::Default(); + EXPECT_OK( + test::CreateEnvFromSystem(ConfigOptions(), &base_env, &env_guard_)); + env_ = base_env; + fs_ = env_->GetFileSystem(); + } + + void SetUp() override { EXPECT_OK(env_->CreateDirIfMissing(dbname_)); db_options_.env = env_; db_options_.fs = fs_; db_options_.db_paths.emplace_back(dbname_, std::numeric_limits::max()); + cf_options_.comparator = ucmp_; + cf_options_.table_factory = mock_table_factory_; } std::string GenerateFileName(uint64_t file_number) { @@ -101,9 +116,10 @@ return TableFileName(db_paths, meta.fd.GetNumber(), meta.fd.GetPathId()); } - static std::string KeyStr(const std::string& user_key, - const SequenceNumber seq_num, const ValueType t) { - return InternalKey(user_key, seq_num, t).Encode().ToString(); + std::string KeyStr(const std::string& user_key, const SequenceNumber seq_num, + const ValueType t, uint64_t ts = 0) { + std::string user_key_with_ts = user_key + encode_u64_ts_(ts); + return InternalKey(user_key_with_ts, seq_num, t).Encode().ToString(); } static std::string BlobStr(uint64_t blob_file_number, uint64_t offset, @@ -129,7 +145,7 @@ return blob_index; } - void AddMockFile(const stl_wrappers::KVMap& contents, int level = 0) { + void AddMockFile(const mock::KVVector& contents, int level = 0) { assert(contents.size() > 0); bool first_key = true; @@ -143,7 +159,8 @@ std::string skey; std::string value; std::tie(skey, value) = kv; - bool parsed = ParseInternalKey(skey, &key); + const Status pik_status = + ParseInternalKey(skey, &key, true /* log_err_key */); smallest_seqno = std::min(smallest_seqno, key.sequence); largest_seqno = std::max(largest_seqno, key.sequence); @@ -161,7 +178,7 @@ first_key = false; - if (parsed && key.type == kTypeBlobIndex) { + if (pik_status.ok() && key.type == kTypeBlobIndex) { BlobIndex blob_index; const Status s = blob_index.DecodeFrom(value); if (!s.ok()) { @@ -186,13 +203,16 @@ VersionEdit edit; edit.AddFile(level, file_number, 0, 10, smallest_key, largest_key, - smallest_seqno, largest_seqno, false, oldest_blob_file_number, - kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + smallest_seqno, largest_seqno, false, Temperature::kUnknown, + oldest_blob_file_number, kUnknownOldestAncesterTime, + kUnknownFileCreationTime, kUnknownFileChecksum, + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); mutex_.Lock(); - versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), - mutable_cf_options_, &edit, &mutex_); + EXPECT_OK( + versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(), + mutable_cf_options_, &edit, &mutex_)); mutex_.Unlock(); } @@ -203,11 +223,11 @@ } // returns expected result after compaction - stl_wrappers::KVMap CreateTwoFiles(bool gen_corrupted_keys) { - auto expected_results = mock::MakeMockFile(); - const int kKeysPerFile = 10000; - const int kCorruptKeysPerFile = 200; - const int kMatchingKeys = kKeysPerFile / 2; + mock::KVVector CreateTwoFiles(bool gen_corrupted_keys) { + stl_wrappers::KVMap expected_results; + constexpr int kKeysPerFile = 10000; + constexpr int kCorruptKeysPerFile = 200; + constexpr int kMatchingKeys = kKeysPerFile / 2; SequenceNumber sequence_number = 0; auto corrupt_id = [&](int id) { @@ -230,49 +250,51 @@ test::CorruptKeyType(&internal_key); test::CorruptKeyType(&bottommost_internal_key); } - contents.insert({ internal_key.Encode().ToString(), value }); + contents.push_back({internal_key.Encode().ToString(), value}); if (i == 1 || k < kMatchingKeys || corrupt_id(k - kMatchingKeys)) { expected_results.insert( - { bottommost_internal_key.Encode().ToString(), value }); + {bottommost_internal_key.Encode().ToString(), value}); } } + mock::SortKVVector(&contents, ucmp_); AddMockFile(contents); } SetLastSequence(sequence_number); - return expected_results; + mock::KVVector expected_results_kvvector; + for (auto& kv : expected_results) { + expected_results_kvvector.push_back({kv.first, kv.second}); + } + + return expected_results_kvvector; } void NewDB() { - DestroyDB(dbname_, Options()); + EXPECT_OK(DestroyDB(dbname_, Options())); EXPECT_OK(env_->CreateDirIfMissing(dbname_)); - versions_.reset(new VersionSet(dbname_, &db_options_, env_options_, - table_cache_.get(), &write_buffer_manager_, - &write_controller_, - /*block_cache_tracer=*/nullptr)); + versions_.reset( + new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_session_id*/ "")); compaction_job_stats_.Reset(); - SetIdentityFile(env_, dbname_); + ASSERT_OK(SetIdentityFile(env_, dbname_)); VersionEdit new_db; - if (db_options_.write_dbid_to_manifest) { - DBImpl* impl = new DBImpl(DBOptions(), dbname_); - std::string db_id; - impl->GetDbIdentityFromIdentityFile(&db_id); - new_db.SetDBId(db_id); - } new_db.SetLogNumber(0); new_db.SetNextFile(2); new_db.SetLastSequence(0); const std::string manifest = DescriptorFileName(dbname_, 1); - std::unique_ptr file; - Status s = env_->NewWritableFile( - manifest, &file, env_->OptimizeForManifestWrite(env_options_)); + std::unique_ptr file_writer; + const auto& fs = env_->GetFileSystem(); + Status s = WritableFileWriter::Create( + fs, manifest, fs->OptimizeForManifestWrite(env_options_), &file_writer, + nullptr); + ASSERT_OK(s); - std::unique_ptr file_writer(new WritableFileWriter( - NewLegacyWritableFileWrapper(std::move(file)), manifest, env_options_)); { log::Writer log(std::move(file_writer), 0, false); std::string record; @@ -281,21 +303,22 @@ } ASSERT_OK(s); // Make "CURRENT" file that points to the new manifest file. - s = SetCurrentFile(env_, dbname_, 1, nullptr); + s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); + + ASSERT_OK(s); - std::vector column_families; - cf_options_.table_factory = mock_table_factory_; cf_options_.merge_operator = merge_op_; cf_options_.compaction_filter = compaction_filter_.get(); + std::vector column_families; column_families.emplace_back(kDefaultColumnFamilyName, cf_options_); - EXPECT_OK(versions_->Recover(column_families, false)); + ASSERT_OK(versions_->Recover(column_families, false)); cfd_ = versions_->GetColumnFamilySet()->GetDefault(); } void RunCompaction( const std::vector>& input_files, - const stl_wrappers::KVMap& expected_results, + const mock::KVVector& expected_results, const std::vector& snapshots = {}, SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber, int output_level = 1, bool verify = true, @@ -314,11 +337,12 @@ num_input_files += level_files.size(); } - Compaction compaction(cfd->current()->storage_info(), *cfd->ioptions(), - *cfd->GetLatestMutableCFOptions(), - compaction_input_files, output_level, 1024 * 1024, - 10 * 1024 * 1024, 0, kNoCompression, - cfd->ioptions()->compression_opts, 0, {}, true); + Compaction compaction( + cfd->current()->storage_info(), *cfd->ioptions(), + *cfd->GetLatestMutableCFOptions(), mutable_db_options_, + compaction_input_files, output_level, 1024 * 1024, 10 * 1024 * 1024, 0, + kNoCompression, cfd->GetLatestMutableCFOptions()->compression_opts, + Temperature::kUnknown, 0, {}, true); compaction.SetInputVersion(cfd->current()); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get()); @@ -326,22 +350,28 @@ EventLogger event_logger(db_options_.info_log.get()); // TODO(yiwu) add a mock snapshot checker and add test for it. SnapshotChecker* snapshot_checker = nullptr; + ASSERT_TRUE(full_history_ts_low_.empty() || + ucmp_->timestamp_size() == full_history_ts_low_.size()); CompactionJob compaction_job( - 0, &compaction, db_options_, env_options_, versions_.get(), - &shutting_down_, preserve_deletes_seqnum_, &log_buffer, nullptr, - nullptr, nullptr, &mutex_, &error_handler_, snapshots, + 0, &compaction, db_options_, mutable_db_options_, env_options_, + versions_.get(), &shutting_down_, preserve_deletes_seqnum_, &log_buffer, + nullptr, nullptr, nullptr, nullptr, &mutex_, &error_handler_, snapshots, earliest_write_conflict_snapshot, snapshot_checker, table_cache_, &event_logger, false, false, dbname_, &compaction_job_stats_, - Env::Priority::USER); + Env::Priority::USER, nullptr /* IOTracer */, + /*manual_compaction_paused=*/nullptr, + /*manual_compaction_canceled=*/nullptr, /*db_id=*/"", + /*db_session_id=*/"", full_history_ts_low_); VerifyInitializationOfCompactionJobStats(compaction_job_stats_); compaction_job.Prepare(); mutex_.Unlock(); - Status s; - s = compaction_job.Run(); + Status s = compaction_job.Run(); ASSERT_OK(s); + ASSERT_OK(compaction_job.io_status()); mutex_.Lock(); ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions())); + ASSERT_OK(compaction_job.io_status()); mutex_.Unlock(); if (verify) { @@ -363,13 +393,16 @@ } } + std::shared_ptr env_guard_; Env* env_; std::shared_ptr fs_; std::string dbname_; + const Comparator* const ucmp_; EnvOptions env_options_; ImmutableDBOptions db_options_; ColumnFamilyOptions cf_options_; MutableCFOptions mutable_cf_options_; + MutableDBOptions mutable_db_options_; std::shared_ptr table_cache_; WriteController write_controller_; WriteBufferManager write_buffer_manager_; @@ -383,6 +416,17 @@ std::unique_ptr compaction_filter_; std::shared_ptr merge_op_; ErrorHandler error_handler_; + std::string full_history_ts_low_; + const std::function encode_u64_ts_; +}; + +// TODO(icanadi) Make it simpler once we mock out VersionSet +class CompactionJobTest : public CompactionJobTestBase { + public: + CompactionJobTest() + : CompactionJobTestBase(test::PerThreadDBPath("compaction_job_test"), + BytewiseComparator(), + [](uint64_t /*ts*/) { return ""; }) {} }; TEST_F(CompactionJobTest, Simple) { @@ -395,7 +439,7 @@ RunCompaction({ files }, expected_results); } -TEST_F(CompactionJobTest, SimpleCorrupted) { +TEST_F(CompactionJobTest, DISABLED_SimpleCorrupted) { NewDB(); auto expected_results = CreateTwoFiles(true); @@ -636,7 +680,7 @@ SetLastSequence(11U); auto files = cfd_->current()->storage_info()->LevelFiles(0); - stl_wrappers::KVMap empty_map; + mock::KVVector empty_map; RunCompaction({files}, empty_map); } @@ -989,7 +1033,7 @@ // single deletion and the (single) deletion gets removed while the corrupt key // gets written out. TODO(noetzli): We probably want a better way to treat // corrupt keys. -TEST_F(CompactionJobTest, CorruptionAfterDeletion) { +TEST_F(CompactionJobTest, DISABLED_CorruptionAfterDeletion) { NewDB(); auto file1 = @@ -1063,10 +1107,312 @@ /* expected_oldest_blob_file_number */ 19); } +TEST_F(CompactionJobTest, InputSerialization) { + // Setup a random CompactionServiceInput + CompactionServiceInput input; + const int kStrMaxLen = 1000; + Random rnd(static_cast(time(nullptr))); + Random64 rnd64(time(nullptr)); + input.column_family.name = rnd.RandomString(rnd.Uniform(kStrMaxLen)); + input.column_family.options.comparator = ReverseBytewiseComparator(); + input.column_family.options.max_bytes_for_level_base = + rnd64.Uniform(UINT64_MAX); + input.column_family.options.disable_auto_compactions = rnd.OneIn(2); + input.column_family.options.compression = kZSTD; + input.column_family.options.compression_opts.level = 4; + input.db_options.max_background_flushes = 10; + input.db_options.paranoid_checks = rnd.OneIn(2); + input.db_options.statistics = CreateDBStatistics(); + input.db_options.env = env_; + while (!rnd.OneIn(10)) { + input.snapshots.emplace_back(rnd64.Uniform(UINT64_MAX)); + } + while (!rnd.OneIn(10)) { + input.input_files.emplace_back(rnd.RandomString( + rnd.Uniform(kStrMaxLen - 1) + + 1)); // input file name should have at least one character + } + input.output_level = 4; + input.has_begin = rnd.OneIn(2); + if (input.has_begin) { + input.begin = rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)); + } + input.has_end = rnd.OneIn(2); + if (input.has_end) { + input.end = rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)); + } + input.approx_size = rnd64.Uniform(UINT64_MAX); + + std::string output; + ASSERT_OK(input.Write(&output)); + + // Test deserialization + CompactionServiceInput deserialized1; + ASSERT_OK(CompactionServiceInput::Read(output, &deserialized1)); + ASSERT_TRUE(deserialized1.TEST_Equals(&input)); + + // Test mismatch + deserialized1.db_options.max_background_flushes += 10; + std::string mismatch; + ASSERT_FALSE(deserialized1.TEST_Equals(&input, &mismatch)); + ASSERT_EQ(mismatch, "db_options.max_background_flushes"); + + // Test unknown field + CompactionServiceInput deserialized2; + output.clear(); + ASSERT_OK(input.Write(&output)); + output.append("new_field=123;"); + + ASSERT_OK(CompactionServiceInput::Read(output, &deserialized2)); + ASSERT_TRUE(deserialized2.TEST_Equals(&input)); + + // Test missing field + CompactionServiceInput deserialized3; + deserialized3.output_level = 0; + std::string to_remove = "output_level=4;"; + size_t pos = output.find(to_remove); + ASSERT_TRUE(pos != std::string::npos); + output.erase(pos, to_remove.length()); + ASSERT_OK(CompactionServiceInput::Read(output, &deserialized3)); + mismatch.clear(); + ASSERT_FALSE(deserialized3.TEST_Equals(&input, &mismatch)); + ASSERT_EQ(mismatch, "output_level"); + + // manually set the value back, should match the original structure + deserialized3.output_level = 4; + ASSERT_TRUE(deserialized3.TEST_Equals(&input)); + + // Test invalid version + output.clear(); + ASSERT_OK(input.Write(&output)); + + uint32_t data_version = DecodeFixed32(output.data()); + const size_t kDataVersionSize = sizeof(data_version); + ASSERT_EQ(data_version, + 1U); // Update once the default data version is changed + char buf[kDataVersionSize]; + EncodeFixed32(buf, data_version + 10); // make sure it's not valid + output.replace(0, kDataVersionSize, buf, kDataVersionSize); + Status s = CompactionServiceInput::Read(output, &deserialized3); + ASSERT_TRUE(s.IsNotSupported()); +} + +TEST_F(CompactionJobTest, ResultSerialization) { + // Setup a random CompactionServiceResult + CompactionServiceResult result; + const int kStrMaxLen = 1000; + Random rnd(static_cast(time(nullptr))); + Random64 rnd64(time(nullptr)); + std::vector status_list = { + Status::OK(), + Status::InvalidArgument("invalid option"), + Status::Aborted("failed to run"), + Status::NotSupported("not supported option"), + }; + result.status = + status_list.at(rnd.Uniform(static_cast(status_list.size()))); + while (!rnd.OneIn(10)) { + result.output_files.emplace_back( + rnd.RandomString(rnd.Uniform(kStrMaxLen)), rnd64.Uniform(UINT64_MAX), + rnd64.Uniform(UINT64_MAX), + rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)), + rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)), + rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX), + rnd64.Uniform(UINT64_MAX), rnd.OneIn(2)); + } + result.output_level = rnd.Uniform(10); + result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen)); + result.num_output_records = rnd64.Uniform(UINT64_MAX); + result.total_bytes = rnd64.Uniform(UINT64_MAX); + result.bytes_read = 123; + result.bytes_written = rnd64.Uniform(UINT64_MAX); + result.stats.elapsed_micros = rnd64.Uniform(UINT64_MAX); + result.stats.num_output_files = rnd.Uniform(1000); + result.stats.is_full_compaction = rnd.OneIn(2); + result.stats.num_single_del_mismatch = rnd64.Uniform(UINT64_MAX); + result.stats.num_input_files = 9; + + std::string output; + ASSERT_OK(result.Write(&output)); + + // Test deserialization + CompactionServiceResult deserialized1; + ASSERT_OK(CompactionServiceResult::Read(output, &deserialized1)); + ASSERT_TRUE(deserialized1.TEST_Equals(&result)); + + // Test mismatch + deserialized1.stats.num_input_files += 10; + std::string mismatch; + ASSERT_FALSE(deserialized1.TEST_Equals(&result, &mismatch)); + ASSERT_EQ(mismatch, "stats.num_input_files"); + + // Test unknown field + CompactionServiceResult deserialized2; + output.clear(); + ASSERT_OK(result.Write(&output)); + output.append("new_field=123;"); + + ASSERT_OK(CompactionServiceResult::Read(output, &deserialized2)); + ASSERT_TRUE(deserialized2.TEST_Equals(&result)); + + // Test missing field + CompactionServiceResult deserialized3; + deserialized3.bytes_read = 0; + std::string to_remove = "bytes_read=123;"; + size_t pos = output.find(to_remove); + ASSERT_TRUE(pos != std::string::npos); + output.erase(pos, to_remove.length()); + ASSERT_OK(CompactionServiceResult::Read(output, &deserialized3)); + mismatch.clear(); + ASSERT_FALSE(deserialized3.TEST_Equals(&result, &mismatch)); + ASSERT_EQ(mismatch, "bytes_read"); + + deserialized3.bytes_read = 123; + ASSERT_TRUE(deserialized3.TEST_Equals(&result)); + + // Test invalid version + output.clear(); + ASSERT_OK(result.Write(&output)); + + uint32_t data_version = DecodeFixed32(output.data()); + const size_t kDataVersionSize = sizeof(data_version); + ASSERT_EQ(data_version, + 1U); // Update once the default data version is changed + char buf[kDataVersionSize]; + EncodeFixed32(buf, data_version + 10); // make sure it's not valid + output.replace(0, kDataVersionSize, buf, kDataVersionSize); + Status s = CompactionServiceResult::Read(output, &deserialized3); + ASSERT_TRUE(s.IsNotSupported()); + for (const auto& item : status_list) { + item.PermitUncheckedError(); + } +} + +class CompactionJobTimestampTest : public CompactionJobTestBase { + public: + CompactionJobTimestampTest() + : CompactionJobTestBase(test::PerThreadDBPath("compaction_job_ts_test"), + test::ComparatorWithU64Ts(), test::EncodeInt) {} +}; + +TEST_F(CompactionJobTimestampTest, GCDisabled) { + NewDB(); + + auto file1 = + mock::MakeMockFile({{KeyStr("a", 10, ValueType::kTypeValue, 100), "a10"}, + {KeyStr("a", 9, ValueType::kTypeValue, 99), "a9"}, + {KeyStr("b", 8, ValueType::kTypeValue, 98), "b8"}, + {KeyStr("d", 7, ValueType::kTypeValue, 97), "d7"}}); + + AddMockFile(file1); + + auto file2 = mock::MakeMockFile( + {{KeyStr("b", 6, ValueType::kTypeDeletionWithTimestamp, 96), ""}, + {KeyStr("c", 5, ValueType::kTypeDeletionWithTimestamp, 95), ""}, + {KeyStr("c", 4, ValueType::kTypeValue, 94), "c5"}, + {KeyStr("d", 3, ValueType::kTypeSingleDeletion, 93), ""}}); + AddMockFile(file2); + + SetLastSequence(10); + + auto expected_results = mock::MakeMockFile( + {{KeyStr("a", 10, ValueType::kTypeValue, 100), "a10"}, + {KeyStr("a", 9, ValueType::kTypeValue, 99), "a9"}, + {KeyStr("b", 8, ValueType::kTypeValue, 98), "b8"}, + {KeyStr("b", 6, ValueType::kTypeDeletionWithTimestamp, 96), ""}, + {KeyStr("c", 5, ValueType::kTypeDeletionWithTimestamp, 95), ""}, + {KeyStr("c", 4, ValueType::kTypeValue, 94), "c5"}, + {KeyStr("d", 7, ValueType::kTypeValue, 97), "d7"}, + {KeyStr("d", 3, ValueType::kTypeSingleDeletion, 93), ""}}); + const auto& files = cfd_->current()->storage_info()->LevelFiles(0); + RunCompaction({files}, expected_results); +} + +TEST_F(CompactionJobTimestampTest, NoKeyExpired) { + NewDB(); + + auto file1 = + mock::MakeMockFile({{KeyStr("a", 6, ValueType::kTypeValue, 100), "a6"}, + {KeyStr("b", 7, ValueType::kTypeValue, 101), "b7"}, + {KeyStr("c", 5, ValueType::kTypeValue, 99), "c5"}}); + AddMockFile(file1); + + auto file2 = + mock::MakeMockFile({{KeyStr("a", 4, ValueType::kTypeValue, 98), "a4"}, + {KeyStr("c", 3, ValueType::kTypeValue, 97), "c3"}}); + AddMockFile(file2); + + SetLastSequence(101); + + auto expected_results = + mock::MakeMockFile({{KeyStr("a", 6, ValueType::kTypeValue, 100), "a6"}, + {KeyStr("a", 4, ValueType::kTypeValue, 98), "a4"}, + {KeyStr("b", 7, ValueType::kTypeValue, 101), "b7"}, + {KeyStr("c", 5, ValueType::kTypeValue, 99), "c5"}, + {KeyStr("c", 3, ValueType::kTypeValue, 97), "c3"}}); + const auto& files = cfd_->current()->storage_info()->LevelFiles(0); + + full_history_ts_low_ = encode_u64_ts_(0); + RunCompaction({files}, expected_results); +} + +TEST_F(CompactionJobTimestampTest, AllKeysExpired) { + NewDB(); + + auto file1 = mock::MakeMockFile( + {{KeyStr("a", 5, ValueType::kTypeDeletionWithTimestamp, 100), ""}, + {KeyStr("b", 6, ValueType::kTypeSingleDeletion, 99), ""}, + {KeyStr("c", 7, ValueType::kTypeValue, 98), "c7"}}); + AddMockFile(file1); + + auto file2 = mock::MakeMockFile( + {{KeyStr("a", 4, ValueType::kTypeValue, 97), "a4"}, + {KeyStr("b", 3, ValueType::kTypeValue, 96), "b3"}, + {KeyStr("c", 2, ValueType::kTypeDeletionWithTimestamp, 95), ""}, + {KeyStr("c", 1, ValueType::kTypeValue, 94), "c1"}}); + AddMockFile(file2); + + SetLastSequence(7); + + auto expected_results = + mock::MakeMockFile({{KeyStr("c", 0, ValueType::kTypeValue, 0), "c7"}}); + const auto& files = cfd_->current()->storage_info()->LevelFiles(0); + + full_history_ts_low_ = encode_u64_ts_(std::numeric_limits::max()); + RunCompaction({files}, expected_results); +} + +TEST_F(CompactionJobTimestampTest, SomeKeysExpired) { + NewDB(); + + auto file1 = + mock::MakeMockFile({{KeyStr("a", 5, ValueType::kTypeValue, 50), "a5"}, + {KeyStr("b", 6, ValueType::kTypeValue, 49), "b6"}}); + AddMockFile(file1); + + auto file2 = mock::MakeMockFile( + {{KeyStr("a", 3, ValueType::kTypeValue, 48), "a3"}, + {KeyStr("a", 2, ValueType::kTypeValue, 46), "a2"}, + {KeyStr("b", 4, ValueType::kTypeDeletionWithTimestamp, 47), ""}}); + AddMockFile(file2); + + SetLastSequence(6); + + auto expected_results = + mock::MakeMockFile({{KeyStr("a", 5, ValueType::kTypeValue, 50), "a5"}, + {KeyStr("a", 0, ValueType::kTypeValue, 0), "a3"}, + {KeyStr("b", 6, ValueType::kTypeValue, 49), "b6"}}); + const auto& files = cfd_->current()->storage_info()->LevelFiles(0); + + full_history_ts_low_ = encode_u64_ts_(49); + RunCompaction({files}, expected_results); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); + RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker.cc 2025-05-19 16:14:27.000000000 +0000 @@ -15,9 +15,11 @@ #include #include #include + #include "db/column_family.h" #include "file/filename.h" #include "logging/log_buffer.h" +#include "logging/logging.h" #include "monitoring/statistics.h" #include "test_util/sync_point.h" #include "util/random.h" @@ -110,9 +112,9 @@ // If bottommost_compression is set and we are compacting to the // bottommost level then we should use it. - if (ioptions.bottommost_compression != kDisableCompressionOption && + if (mutable_cf_options.bottommost_compression != kDisableCompressionOption && level >= (vstorage->num_non_empty_levels() - 1)) { - return ioptions.bottommost_compression; + return mutable_cf_options.bottommost_compression; } // If the user has specified a different compression level for each level, // then pick the compression for that level. @@ -132,25 +134,23 @@ } } -CompressionOptions GetCompressionOptions(const ImmutableCFOptions& ioptions, +CompressionOptions GetCompressionOptions(const MutableCFOptions& cf_options, const VersionStorageInfo* vstorage, int level, const bool enable_compression) { if (!enable_compression) { - return ioptions.compression_opts; + return cf_options.compression_opts; } - // If bottommost_compression is set and we are compacting to the - // bottommost level then we should use the specified compression options - // for the bottmomost_compression. - if (ioptions.bottommost_compression != kDisableCompressionOption && - level >= (vstorage->num_non_empty_levels() - 1) && - ioptions.bottommost_compression_opts.enabled) { - return ioptions.bottommost_compression_opts; + // If bottommost_compression_opts is enabled and we are compacting to the + // bottommost level then we should use the specified compression options. + if (level >= (vstorage->num_non_empty_levels() - 1) && + cf_options.bottommost_compression_opts.enabled) { + return cf_options.bottommost_compression_opts; } - return ioptions.compression_opts; + return cf_options.compression_opts; } -CompactionPicker::CompactionPicker(const ImmutableCFOptions& ioptions, +CompactionPicker::CompactionPicker(const ImmutableOptions& ioptions, const InternalKeyComparator* icmp) : ioptions_(ioptions), icmp_(icmp) {} @@ -332,7 +332,7 @@ const CompactionOptions& compact_options, const std::vector& input_files, int output_level, VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options, - uint32_t output_path_id) { + const MutableDBOptions& mutable_db_options, uint32_t output_path_id) { assert(input_files.size()); // This compaction output should not overlap with a running compaction as // `SanitizeCompactionInputFiles` should've checked earlier and db mutex @@ -356,11 +356,11 @@ compression_type = compact_options.compression; } auto c = new Compaction( - vstorage, ioptions_, mutable_cf_options, input_files, output_level, - compact_options.output_file_size_limit, + vstorage, ioptions_, mutable_cf_options, mutable_db_options, input_files, + output_level, compact_options.output_file_size_limit, mutable_cf_options.max_compaction_bytes, output_path_id, compression_type, - GetCompressionOptions(ioptions_, vstorage, output_level), - compact_options.max_subcompactions, + GetCompressionOptions(mutable_cf_options, vstorage, output_level), + Temperature::kUnknown, compact_options.max_subcompactions, /* grandparents */ {}, true); RegisterCompaction(c); return c; @@ -532,7 +532,7 @@ } } if (expand_inputs) { - ROCKS_LOG_INFO(ioptions_.info_log, + ROCKS_LOG_INFO(ioptions_.logger, "[%s] Expanding@%d %" ROCKSDB_PRIszt "+%" ROCKSDB_PRIszt "(%" PRIu64 "+%" PRIu64 " bytes) to %" ROCKSDB_PRIszt "+%" ROCKSDB_PRIszt " (%" PRIu64 "+%" PRIu64 " bytes)\n", @@ -554,16 +554,21 @@ InternalKey start, limit; GetRange(inputs, output_level_inputs, &start, &limit); // Compute the set of grandparent files that overlap this compaction - // (parent == level+1; grandparent == level+2) - if (output_level_inputs.level + 1 < NumberLevels()) { - vstorage->GetOverlappingInputs(output_level_inputs.level + 1, &start, - &limit, grandparents); + // (parent == level+1; grandparent == level+2 or the first + // level after that has overlapping files) + for (int level = output_level_inputs.level + 1; level < NumberLevels(); + level++) { + vstorage->GetOverlappingInputs(level, &start, &limit, grandparents); + if (!grandparents->empty()) { + break; + } } } Compaction* CompactionPicker::CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, int input_level, int output_level, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict, uint64_t max_file_num_to_ignore) { @@ -626,18 +631,20 @@ } Compaction* c = new Compaction( - vstorage, ioptions_, mutable_cf_options, std::move(inputs), - output_level, + vstorage, ioptions_, mutable_cf_options, mutable_db_options, + std::move(inputs), output_level, MaxFileSizeForLevel(mutable_cf_options, output_level, ioptions_.compaction_style), /* max_compaction_bytes */ LLONG_MAX, compact_range_options.target_path_id, GetCompressionType(ioptions_, vstorage, mutable_cf_options, output_level, 1), - GetCompressionOptions(ioptions_, vstorage, output_level), - compact_range_options.max_subcompactions, /* grandparents */ {}, + GetCompressionOptions(mutable_cf_options, vstorage, output_level), + Temperature::kUnknown, compact_range_options.max_subcompactions, + /* grandparents */ {}, /* is manual */ true); RegisterCompaction(c); + vstorage->ComputeCompactionScore(ioptions_, mutable_cf_options); return c; } @@ -670,17 +677,41 @@ // two files overlap. if (input_level > 0) { const uint64_t limit = mutable_cf_options.max_compaction_bytes; - uint64_t total = 0; + uint64_t input_level_total = 0; + int hint_index = -1; + InternalKey* smallest = nullptr; + InternalKey* largest = nullptr; for (size_t i = 0; i + 1 < inputs.size(); ++i) { + if (!smallest) { + smallest = &inputs[i]->smallest; + } + largest = &inputs[i]->largest; + uint64_t s = inputs[i]->compensated_file_size; - total += s; - if (total >= limit) { + uint64_t output_level_total = 0; + if (output_level < vstorage->num_non_empty_levels()) { + std::vector files; + vstorage->GetOverlappingInputsRangeBinarySearch( + output_level, smallest, largest, &files, hint_index, &hint_index); + for (const auto& file : files) { + output_level_total += file->compensated_file_size; + } + } + + input_level_total += s; + + if (input_level_total + output_level_total >= limit) { covering_the_whole_range = false; + // still include the current file, so the compaction could be larger + // than max_compaction_bytes, which is also to make sure the compaction + // can make progress even `max_compaction_bytes` is small (e.g. smaller + // than an SST file). inputs.files.resize(i + 1); break; } } } + assert(compact_range_options.target_path_id < static_cast(ioptions_.cf_paths.size())); @@ -778,8 +809,8 @@ std::vector grandparents; GetGrandparents(vstorage, inputs, output_level_inputs, &grandparents); Compaction* compaction = new Compaction( - vstorage, ioptions_, mutable_cf_options, std::move(compaction_inputs), - output_level, + vstorage, ioptions_, mutable_cf_options, mutable_db_options, + std::move(compaction_inputs), output_level, MaxFileSizeForLevel(mutable_cf_options, output_level, ioptions_.compaction_style, vstorage->base_level(), ioptions_.level_compaction_dynamic_level_bytes), @@ -787,8 +818,9 @@ compact_range_options.target_path_id, GetCompressionType(ioptions_, vstorage, mutable_cf_options, output_level, vstorage->base_level()), - GetCompressionOptions(ioptions_, vstorage, output_level), - compact_range_options.max_subcompactions, std::move(grandparents), + GetCompressionOptions(mutable_cf_options, vstorage, output_level), + Temperature::kUnknown, compact_range_options.max_subcompactions, + std::move(grandparents), /* is manual compaction */ true); TEST_SYNC_POINT_CALLBACK("CompactionPicker::CompactRange:Return", compaction); @@ -1004,6 +1036,7 @@ // any currently-existing files. for (auto file_num : *input_files) { bool found = false; + int input_file_level = -1; for (const auto& level_meta : cf_meta.levels) { for (const auto& file_meta : level_meta.files) { if (file_num == TableFileNameToNumber(file_meta.name)) { @@ -1013,6 +1046,7 @@ " is already being compacted."); } found = true; + input_file_level = level_meta.level; break; } } @@ -1025,6 +1059,13 @@ "Specified compaction input file " + MakeTableFileName("", file_num) + " does not exist in column family " + cf_meta.name + "."); } + if (input_file_level > output_level) { + return Status::InvalidArgument( + "Cannot compact file to up level, input file: " + + MakeTableFileName("", file_num) + " level " + + ToString(input_file_level) + " > output level " + + ToString(output_level)); + } } return Status::OK(); @@ -1043,6 +1084,8 @@ level0_compactions_in_progress_.insert(c); } compactions_in_progress_.insert(c); + TEST_SYNC_POINT_CALLBACK("CompactionPicker::RegisterCompaction:Registered", + c); } void CompactionPicker::UnregisterCompaction(Compaction* c) { @@ -1085,6 +1128,8 @@ Random64 rnd(/* seed */ reinterpret_cast(vstorage)); size_t random_file_index = static_cast(rnd.Uniform( static_cast(vstorage->FilesMarkedForCompaction().size()))); + TEST_SYNC_POINT_CALLBACK("CompactionPicker::PickFilesMarkedForCompaction", + &random_file_index); if (continuation(vstorage->FilesMarkedForCompaction()[random_file_index])) { // found the compaction! diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker.h 2025-05-19 16:14:27.000000000 +0000 @@ -46,7 +46,7 @@ // compaction style specific logic for them. class CompactionPicker { public: - CompactionPicker(const ImmutableCFOptions& ioptions, + CompactionPicker(const ImmutableOptions& ioptions, const InternalKeyComparator* icmp); virtual ~CompactionPicker(); @@ -56,7 +56,8 @@ // describes the compaction. Caller should delete the result. virtual Compaction* PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer, SequenceNumber earliest_memtable_seqno = kMaxSequenceNumber) = 0; // Return a compaction object for compacting the range [begin,end] in @@ -72,7 +73,8 @@ // *compaction_end should point to valid InternalKey! virtual Compaction* CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, int input_level, int output_level, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict, @@ -113,6 +115,7 @@ const std::vector& input_files, int output_level, VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, uint32_t output_path_id); // Converts a set of compaction input file numbers into @@ -215,7 +218,7 @@ } protected: - const ImmutableCFOptions& ioptions_; + const ImmutableOptions& ioptions_; // A helper function to SanitizeCompactionInputFiles() that // sanitizes "input_files" by adding necessary files. @@ -241,7 +244,7 @@ // compaction. class NullCompactionPicker : public CompactionPicker { public: - NullCompactionPicker(const ImmutableCFOptions& ioptions, + NullCompactionPicker(const ImmutableOptions& ioptions, const InternalKeyComparator* icmp) : CompactionPicker(ioptions, icmp) {} virtual ~NullCompactionPicker() {} @@ -250,6 +253,7 @@ Compaction* PickCompaction( const std::string& /*cf_name*/, const MutableCFOptions& /*mutable_cf_options*/, + const MutableDBOptions& /*mutable_db_options*/, VersionStorageInfo* /*vstorage*/, LogBuffer* /* log_buffer */, SequenceNumber /* earliest_memtable_seqno */) override { return nullptr; @@ -258,6 +262,7 @@ // Always return "nullptr" Compaction* CompactRange(const std::string& /*cf_name*/, const MutableCFOptions& /*mutable_cf_options*/, + const MutableDBOptions& /*mutable_db_options*/, VersionStorageInfo* /*vstorage*/, int /*input_level*/, int /*output_level*/, const CompactRangeOptions& /*compact_range_options*/, @@ -305,9 +310,9 @@ int level, int base_level, const bool enable_compression = true); -CompressionOptions GetCompressionOptions(const ImmutableCFOptions& ioptions, - const VersionStorageInfo* vstorage, - int level, - const bool enable_compression = true); +CompressionOptions GetCompressionOptions( + const MutableCFOptions& mutable_cf_options, + const VersionStorageInfo* vstorage, int level, + const bool enable_compression = true); } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc 2025-05-19 16:14:27.000000000 +0000 @@ -13,8 +13,10 @@ #include #include #include + #include "db/column_family.h" #include "logging/log_buffer.h" +#include "logging/logging.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { @@ -36,7 +38,8 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer) { + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer) { assert(mutable_cf_options.ttl > 0); const int kLevel0 = 0; @@ -44,7 +47,7 @@ uint64_t total_size = GetTotalFilesSize(level_files); int64_t _current_time; - auto status = ioptions_.env->GetCurrentTime(&_current_time); + auto status = ioptions_.clock->GetCurrentTime(&_current_time); if (!status.ok()) { ROCKS_LOG_BUFFER(log_buffer, "[%s] FIFO compaction: Couldn't get current time: %s. " @@ -70,18 +73,18 @@ // avoid underflow if (current_time > mutable_cf_options.ttl) { for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) { - auto f = *ritr; - if (f->fd.table_reader != nullptr && - f->fd.table_reader->GetTableProperties() != nullptr) { - auto creation_time = + FileMetaData* f = *ritr; + assert(f); + if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { + uint64_t creation_time = f->fd.table_reader->GetTableProperties()->creation_time; if (creation_time == 0 || creation_time >= (current_time - mutable_cf_options.ttl)) { break; } - total_size -= f->compensated_file_size; - inputs[0].files.push_back(f); } + total_size -= f->compensated_file_size; + inputs[0].files.push_back(f); } } @@ -96,24 +99,31 @@ } for (const auto& f : inputs[0].files) { + uint64_t creation_time = 0; + assert(f); + if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { + creation_time = f->fd.table_reader->GetTableProperties()->creation_time; + } ROCKS_LOG_BUFFER(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 " with creation time %" PRIu64 " for deletion", - cf_name.c_str(), f->fd.GetNumber(), - f->fd.table_reader->GetTableProperties()->creation_time); + cf_name.c_str(), f->fd.GetNumber(), creation_time); } Compaction* c = new Compaction( - vstorage, ioptions_, mutable_cf_options, std::move(inputs), 0, 0, 0, 0, - kNoCompression, ioptions_.compression_opts, /* max_subcompactions */ 0, - {}, /* is manual */ false, vstorage->CompactionScore(0), + vstorage, ioptions_, mutable_cf_options, mutable_db_options, + std::move(inputs), 0, 0, 0, 0, kNoCompression, + mutable_cf_options.compression_opts, Temperature::kUnknown, + /* max_subcompactions */ 0, {}, /* is manual */ false, + vstorage->CompactionScore(0), /* is deletion compaction */ true, CompactionReason::kFIFOTtl); return c; } Compaction* FIFOCompactionPicker::PickSizeCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer) { + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer) { const int kLevel0 = 0; const std::vector& level_files = vstorage->LevelFiles(kLevel0); uint64_t total_size = GetTotalFilesSize(level_files); @@ -142,11 +152,12 @@ max_compact_bytes_per_del_file, mutable_cf_options.max_compaction_bytes, &comp_inputs)) { Compaction* c = new Compaction( - vstorage, ioptions_, mutable_cf_options, {comp_inputs}, 0, - 16 * 1024 * 1024 /* output file size limit */, + vstorage, ioptions_, mutable_cf_options, mutable_db_options, + {comp_inputs}, 0, 16 * 1024 * 1024 /* output file size limit */, 0 /* max compaction bytes, not applicable */, 0 /* output path ID */, mutable_cf_options.compression, - ioptions_.compression_opts, 0 /* max_subcompactions */, {}, + mutable_cf_options.compression_opts, Temperature::kUnknown, + 0 /* max_subcompactions */, {}, /* is manual */ false, vstorage->CompactionScore(0), /* is deletion compaction */ false, CompactionReason::kFIFOReduceNumFiles); @@ -193,25 +204,139 @@ } Compaction* c = new Compaction( - vstorage, ioptions_, mutable_cf_options, std::move(inputs), 0, 0, 0, 0, - kNoCompression, ioptions_.compression_opts, /* max_subcompactions */ 0, - {}, /* is manual */ false, vstorage->CompactionScore(0), + vstorage, ioptions_, mutable_cf_options, mutable_db_options, + std::move(inputs), 0, 0, 0, 0, kNoCompression, + mutable_cf_options.compression_opts, Temperature::kUnknown, + /* max_subcompactions */ 0, {}, /* is manual */ false, + vstorage->CompactionScore(0), /* is deletion compaction */ true, CompactionReason::kFIFOMaxSize); return c; } +Compaction* FIFOCompactionPicker::PickCompactionToWarm( + const std::string& cf_name, const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer) { + if (mutable_cf_options.compaction_options_fifo.age_for_warm == 0) { + return nullptr; + } + + const int kLevel0 = 0; + const std::vector& level_files = vstorage->LevelFiles(kLevel0); + + int64_t _current_time; + auto status = ioptions_.clock->GetCurrentTime(&_current_time); + if (!status.ok()) { + ROCKS_LOG_BUFFER(log_buffer, + "[%s] FIFO compaction: Couldn't get current time: %s. " + "Not doing compactions based on warm threshold. ", + cf_name.c_str(), status.ToString().c_str()); + return nullptr; + } + const uint64_t current_time = static_cast(_current_time); + + if (!level0_compactions_in_progress_.empty()) { + ROCKS_LOG_BUFFER( + log_buffer, + "[%s] FIFO compaction: Already executing compaction. Parallel " + "compactions are not supported", + cf_name.c_str()); + return nullptr; + } + + std::vector inputs; + inputs.emplace_back(); + inputs[0].level = 0; + + // avoid underflow + if (current_time > mutable_cf_options.compaction_options_fifo.age_for_warm) { + uint64_t create_time_threshold = + current_time - mutable_cf_options.compaction_options_fifo.age_for_warm; + uint64_t compaction_size = 0; + // We will ideally identify a file qualifying for warm tier by knowing + // the timestamp for the youngest entry in the file. However, right now + // we don't have the information. We infer it by looking at timestamp + // of the next file's (which is just younger) oldest entry's timestamp. + FileMetaData* prev_file = nullptr; + for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) { + FileMetaData* f = *ritr; + assert(f); + if (f->being_compacted) { + // Right now this probably won't happen as we never try to schedule + // two compactions in parallel, so here we just simply don't schedule + // anything. + return nullptr; + } + uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime(); + if (oldest_ancester_time == kUnknownOldestAncesterTime) { + // Older files might not have enough information. It is possible to + // handle these files by looking at newer files, but maintaining the + // logic isn't worth it. + break; + } + if (oldest_ancester_time > create_time_threshold) { + // The previous file (which has slightly older data) doesn't qualify + // for warm tier. + break; + } + if (prev_file != nullptr) { + compaction_size += prev_file->fd.GetFileSize(); + if (compaction_size > mutable_cf_options.max_compaction_bytes) { + break; + } + inputs[0].files.push_back(prev_file); + ROCKS_LOG_BUFFER(log_buffer, + "[%s] FIFO compaction: picking file %" PRIu64 + " with next file's oldest time %" PRIu64 " for warm", + cf_name.c_str(), prev_file->fd.GetNumber(), + oldest_ancester_time); + } + if (f->temperature == Temperature::kUnknown || + f->temperature == Temperature::kHot) { + prev_file = f; + } else if (!inputs[0].files.empty()) { + // A warm file newer than files picked. + break; + } else { + assert(prev_file == nullptr); + } + } + } + + if (inputs[0].files.empty()) { + return nullptr; + } + + Compaction* c = new Compaction( + vstorage, ioptions_, mutable_cf_options, mutable_db_options, + std::move(inputs), 0, 0 /* output file size limit */, + 0 /* max compaction bytes, not applicable */, 0 /* output path ID */, + mutable_cf_options.compression, mutable_cf_options.compression_opts, + Temperature::kWarm, + /* max_subcompactions */ 0, {}, /* is manual */ false, + vstorage->CompactionScore(0), + /* is deletion compaction */ false, CompactionReason::kChangeTemperature); + return c; +} + Compaction* FIFOCompactionPicker::PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer, - SequenceNumber /*earliest_memtable_seqno*/) { + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer, SequenceNumber /*earliest_memtable_seqno*/) { assert(vstorage->num_levels() == 1); Compaction* c = nullptr; if (mutable_cf_options.ttl > 0) { - c = PickTTLCompaction(cf_name, mutable_cf_options, vstorage, log_buffer); + c = PickTTLCompaction(cf_name, mutable_cf_options, mutable_db_options, + vstorage, log_buffer); + } + if (c == nullptr) { + c = PickSizeCompaction(cf_name, mutable_cf_options, mutable_db_options, + vstorage, log_buffer); } if (c == nullptr) { - c = PickSizeCompaction(cf_name, mutable_cf_options, vstorage, log_buffer); + c = PickCompactionToWarm(cf_name, mutable_cf_options, mutable_db_options, + vstorage, log_buffer); } RegisterCompaction(c); return c; @@ -219,7 +344,8 @@ Compaction* FIFOCompactionPicker::CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, int input_level, int output_level, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + int input_level, int output_level, const CompactRangeOptions& /*compact_range_options*/, const InternalKey* /*begin*/, const InternalKey* /*end*/, InternalKey** compaction_end, bool* /*manual_conflict*/, @@ -231,9 +357,9 @@ assert(input_level == 0); assert(output_level == 0); *compaction_end = nullptr; - LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, ioptions_.info_log); - Compaction* c = - PickCompaction(cf_name, mutable_cf_options, vstorage, &log_buffer); + LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, ioptions_.logger); + Compaction* c = PickCompaction(cf_name, mutable_cf_options, + mutable_db_options, vstorage, &log_buffer); log_buffer.FlushBufferToLog(); return c; } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h 2025-05-19 16:14:27.000000000 +0000 @@ -15,18 +15,20 @@ namespace ROCKSDB_NAMESPACE { class FIFOCompactionPicker : public CompactionPicker { public: - FIFOCompactionPicker(const ImmutableCFOptions& ioptions, + FIFOCompactionPicker(const ImmutableOptions& ioptions, const InternalKeyComparator* icmp) : CompactionPicker(ioptions, icmp) {} virtual Compaction* PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* version, LogBuffer* log_buffer, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* version, + LogBuffer* log_buffer, SequenceNumber earliest_memtable_seqno = kMaxSequenceNumber) override; virtual Compaction* CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, int input_level, int output_level, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + int input_level, int output_level, const CompactRangeOptions& compact_range_options, const InternalKey* begin, const InternalKey* end, InternalKey** compaction_end, bool* manual_conflict, @@ -41,13 +43,21 @@ private: Compaction* PickTTLCompaction(const std::string& cf_name, const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* version, LogBuffer* log_buffer); Compaction* PickSizeCompaction(const std::string& cf_name, const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* version, LogBuffer* log_buffer); + + Compaction* PickCompactionToWarm(const std::string& cf_name, + const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, + VersionStorageInfo* version, + LogBuffer* log_buffer); }; } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc 2025-05-19 16:14:27.000000000 +0000 @@ -31,6 +31,9 @@ if (!vstorage->FilesMarkedForCompaction().empty()) { return true; } + if (!vstorage->FilesMarkedForForcedBlobGC().empty()) { + return true; + } for (int i = 0; i <= vstorage->MaxInputLevel(); i++) { if (vstorage->CompactionScore(i) >= 1) { return true; @@ -49,14 +52,16 @@ CompactionPicker* compaction_picker, LogBuffer* log_buffer, const MutableCFOptions& mutable_cf_options, - const ImmutableCFOptions& ioptions) + const ImmutableOptions& ioptions, + const MutableDBOptions& mutable_db_options) : cf_name_(cf_name), vstorage_(vstorage), earliest_mem_seqno_(earliest_mem_seqno), compaction_picker_(compaction_picker), log_buffer_(log_buffer), mutable_cf_options_(mutable_cf_options), - ioptions_(ioptions) {} + ioptions_(ioptions), + mutable_db_options_(mutable_db_options) {} // Pick and return a compaction. Compaction* PickCompaction(); @@ -93,9 +98,13 @@ // otherwise, returns false. bool PickIntraL0Compaction(); - void PickExpiredTtlFiles(); - - void PickFilesMarkedForPeriodicCompaction(); + // Picks a file from level_files to compact. + // level_files is a vector of (level, file metadata) in ascending order of + // level. If compact_to_next_level is true, compact the file to the next + // level, otherwise, compact to the same level as the input file. + void PickFileToCompact( + const autovector>& level_files, + bool compact_to_next_level); const std::string& cf_name_; VersionStorageInfo* vstorage_; @@ -115,7 +124,8 @@ CompactionReason compaction_reason_ = CompactionReason::kUnknown; const MutableCFOptions& mutable_cf_options_; - const ImmutableCFOptions& ioptions_; + const ImmutableOptions& ioptions_; + const MutableDBOptions& mutable_db_options_; // Pick a path ID to place a newly generated file, with its level static uint32_t GetPathId(const ImmutableCFOptions& ioptions, const MutableCFOptions& mutable_cf_options, @@ -124,72 +134,34 @@ static const int kMinFilesForIntraL0Compaction = 4; }; -void LevelCompactionBuilder::PickExpiredTtlFiles() { - if (vstorage_->ExpiredTtlFiles().empty()) { - return; - } - - auto continuation = [&](std::pair level_file) { +void LevelCompactionBuilder::PickFileToCompact( + const autovector>& level_files, + bool compact_to_next_level) { + for (auto& level_file : level_files) { // If it's being compacted it has nothing to do here. // If this assert() fails that means that some function marked some // files as being_compacted, but didn't call ComputeCompactionScore() assert(!level_file.second->being_compacted); start_level_ = level_file.first; - output_level_ = - (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; - - if ((start_level_ == vstorage_->num_non_empty_levels() - 1) || + if ((compact_to_next_level && + start_level_ == vstorage_->num_non_empty_levels() - 1) || (start_level_ == 0 && !compaction_picker_->level0_compactions_in_progress()->empty())) { - return false; - } - - start_level_inputs_.files = {level_file.second}; - start_level_inputs_.level = start_level_; - return compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, - &start_level_inputs_); - }; - - for (auto& level_file : vstorage_->ExpiredTtlFiles()) { - if (continuation(level_file)) { - // found the compaction! - return; + continue; } - } - - start_level_inputs_.files.clear(); -} - -void LevelCompactionBuilder::PickFilesMarkedForPeriodicCompaction() { - if (vstorage_->FilesMarkedForPeriodicCompaction().empty()) { - return; - } - - auto continuation = [&](std::pair level_file) { - // If it's being compacted it has nothing to do here. - // If this assert() fails that means that some function marked some - // files as being_compacted, but didn't call ComputeCompactionScore() - assert(!level_file.second->being_compacted); - output_level_ = start_level_ = level_file.first; - - if (start_level_ == 0 && - !compaction_picker_->level0_compactions_in_progress()->empty()) { - return false; + if (compact_to_next_level) { + output_level_ = + (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; + } else { + output_level_ = start_level_; } - start_level_inputs_.files = {level_file.second}; start_level_inputs_.level = start_level_; - return compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, - &start_level_inputs_); - }; - - for (auto& level_file : vstorage_->FilesMarkedForPeriodicCompaction()) { - if (continuation(level_file)) { - // found the compaction! + if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &start_level_inputs_)) { return; } } - start_level_inputs_.files.clear(); } @@ -238,64 +210,53 @@ } } } + } else { + // Compaction scores are sorted in descending order, no further scores + // will be >= 1. + break; } } + if (!start_level_inputs_.empty()) { + return; + } // if we didn't find a compaction, check if there are any files marked for // compaction - if (start_level_inputs_.empty()) { - parent_index_ = base_index_ = -1; + parent_index_ = base_index_ = -1; - compaction_picker_->PickFilesMarkedForCompaction( - cf_name_, vstorage_, &start_level_, &output_level_, - &start_level_inputs_); - if (!start_level_inputs_.empty()) { - is_manual_ = true; - compaction_reason_ = CompactionReason::kFilesMarkedForCompaction; - return; - } + compaction_picker_->PickFilesMarkedForCompaction( + cf_name_, vstorage_, &start_level_, &output_level_, &start_level_inputs_); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kFilesMarkedForCompaction; + return; } // Bottommost Files Compaction on deleting tombstones - if (start_level_inputs_.empty()) { - size_t i; - for (i = 0; i < vstorage_->BottommostFilesMarkedForCompaction().size(); - ++i) { - auto& level_and_file = vstorage_->BottommostFilesMarkedForCompaction()[i]; - assert(!level_and_file.second->being_compacted); - start_level_inputs_.level = output_level_ = start_level_ = - level_and_file.first; - start_level_inputs_.files = {level_and_file.second}; - if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, - &start_level_inputs_)) { - break; - } - } - if (i == vstorage_->BottommostFilesMarkedForCompaction().size()) { - start_level_inputs_.clear(); - } else { - assert(!start_level_inputs_.empty()); - compaction_reason_ = CompactionReason::kBottommostFiles; - return; - } + PickFileToCompact(vstorage_->BottommostFilesMarkedForCompaction(), false); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kBottommostFiles; + return; } // TTL Compaction - if (start_level_inputs_.empty()) { - PickExpiredTtlFiles(); - if (!start_level_inputs_.empty()) { - compaction_reason_ = CompactionReason::kTtl; - return; - } + PickFileToCompact(vstorage_->ExpiredTtlFiles(), true); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kTtl; + return; } // Periodic Compaction - if (start_level_inputs_.empty()) { - PickFilesMarkedForPeriodicCompaction(); - if (!start_level_inputs_.empty()) { - compaction_reason_ = CompactionReason::kPeriodicCompaction; - return; - } + PickFileToCompact(vstorage_->FilesMarkedForPeriodicCompaction(), false); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kPeriodicCompaction; + return; + } + + // Forced blob garbage collection + PickFileToCompact(vstorage_->FilesMarkedForForcedBlobGC(), false); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kForcedBlobGC; + return; } } @@ -375,8 +336,8 @@ Compaction* LevelCompactionBuilder::GetCompaction() { auto c = new Compaction( - vstorage_, ioptions_, mutable_cf_options_, std::move(compaction_inputs_), - output_level_, + vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_, + std::move(compaction_inputs_), output_level_, MaxFileSizeForLevel(mutable_cf_options_, output_level_, ioptions_.compaction_style, vstorage_->base_level(), ioptions_.level_compaction_dynamic_level_bytes), @@ -384,7 +345,8 @@ GetPathId(ioptions_, mutable_cf_options_, output_level_), GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, output_level_, vstorage_->base_level()), - GetCompressionOptions(ioptions_, vstorage_, output_level_), + GetCompressionOptions(mutable_cf_options_, vstorage_, output_level_), + Temperature::kUnknown, /* max_subcompactions */ 0, std::move(grandparents_), is_manual_, start_level_score_, false /* deletion_compaction */, compaction_reason_); @@ -433,7 +395,7 @@ if (ioptions.level_compaction_dynamic_level_bytes) { // Currently, level_compaction_dynamic_level_bytes is ignored when // multiple db paths are specified. https://github.com/facebook/ - // rocksdb/blob/master/db/column_family.cc. + // rocksdb/blob/main/db/column_family.cc. // Still, adding this check to avoid accidentally using // max_bytes_for_level_multiplier_additional level_size = static_cast( @@ -549,10 +511,11 @@ Compaction* LevelCompactionPicker::PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer, - SequenceNumber earliest_mem_seqno) { + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer, SequenceNumber earliest_mem_seqno) { LevelCompactionBuilder builder(cf_name, vstorage, earliest_mem_seqno, this, - log_buffer, mutable_cf_options, ioptions_); + log_buffer, mutable_cf_options, ioptions_, + mutable_db_options); return builder.PickCompaction(); } } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.h 2025-05-19 16:14:27.000000000 +0000 @@ -17,12 +17,13 @@ // for description of Leveled compaction. class LevelCompactionPicker : public CompactionPicker { public: - LevelCompactionPicker(const ImmutableCFOptions& ioptions, + LevelCompactionPicker(const ImmutableOptions& ioptions, const InternalKeyComparator* icmp) : CompactionPicker(ioptions, icmp) {} virtual Compaction* PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer, SequenceNumber earliest_memtable_seqno = kMaxSequenceNumber) override; virtual bool NeedsCompaction( diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -3,16 +3,15 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). - #include #include #include + #include "db/compaction/compaction.h" #include "db/compaction/compaction_picker_fifo.h" #include "db/compaction/compaction_picker_level.h" #include "db/compaction/compaction_picker_universal.h" - -#include "logging/logging.h" +#include "db/compaction/file_pri.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" @@ -31,8 +30,9 @@ const Comparator* ucmp_; InternalKeyComparator icmp_; Options options_; - ImmutableCFOptions ioptions_; + ImmutableOptions ioptions_; MutableCFOptions mutable_cf_options_; + MutableDBOptions mutable_db_options_; LevelCompactionPicker level_compaction_picker; std::string cf_name_; CountingLogger logger_; @@ -52,6 +52,7 @@ icmp_(ucmp_), ioptions_(options_), mutable_cf_options_(options_), + mutable_db_options_(), level_compaction_picker(ioptions_, &icmp_), cf_name_("dummy"), log_buffer_(InfoLogLevel::INFO_LEVEL, &logger_), @@ -78,8 +79,17 @@ vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_); } + // Create a new VersionStorageInfo object so we can add mode files and then + // merge it with the existing VersionStorageInfo + void AddVersionStorage() { + temp_vstorage_.reset(new VersionStorageInfo( + &icmp_, ucmp_, options_.num_levels, ioptions_.compaction_style, + vstorage_.get(), false)); + } + void DeleteVersionStorage() { vstorage_.reset(); + temp_vstorage_.reset(); files_.clear(); file_map_.clear(); input_files_.clear(); @@ -88,18 +98,28 @@ void Add(int level, uint32_t file_number, const char* smallest, const char* largest, uint64_t file_size = 1, uint32_t path_id = 0, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100, - size_t compensated_file_size = 0) { - assert(level < vstorage_->num_levels()); + size_t compensated_file_size = 0, bool marked_for_compact = false, + Temperature temperature = Temperature::kUnknown, + uint64_t oldest_ancestor_time = kUnknownOldestAncesterTime) { + VersionStorageInfo* vstorage; + if (temp_vstorage_) { + vstorage = temp_vstorage_.get(); + } else { + vstorage = vstorage_.get(); + } + assert(level < vstorage->num_levels()); FileMetaData* f = new FileMetaData( file_number, path_id, file_size, InternalKey(smallest, smallest_seq, kTypeValue), InternalKey(largest, largest_seq, kTypeValue), smallest_seq, - largest_seq, /* marked_for_compact */ false, kInvalidBlobFileNumber, + largest_seq, marked_for_compact, temperature, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); f->compensated_file_size = (compensated_file_size != 0) ? compensated_file_size : file_size; - vstorage_->AddFile(level, f); + f->oldest_ancester_time = oldest_ancestor_time; + vstorage->AddFile(level, f); files_.emplace_back(f); file_map_.insert({file_number, {f, level}}); } @@ -122,8 +142,14 @@ } void UpdateVersionStorageInfo() { + if (temp_vstorage_) { + VersionBuilder builder(FileOptions(), &ioptions_, nullptr, + vstorage_.get(), nullptr); + ASSERT_OK(builder.SaveTo(temp_vstorage_.get())); + vstorage_ = std::move(temp_vstorage_); + } vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_); - vstorage_->UpdateFilesByCompactionPri(ioptions_.compaction_pri); + vstorage_->UpdateFilesByCompactionPri(ioptions_, mutable_cf_options_); vstorage_->UpdateNumNonEmptyLevels(); vstorage_->GenerateFileIndexer(); vstorage_->GenerateLevelFilesBrief(); @@ -132,13 +158,36 @@ vstorage_->ComputeFilesMarkedForCompaction(); vstorage_->SetFinalized(); } + void AddFileToVersionStorage(int level, uint32_t file_number, + const char* smallest, const char* largest, + uint64_t file_size = 1, uint32_t path_id = 0, + SequenceNumber smallest_seq = 100, + SequenceNumber largest_seq = 100, + size_t compensated_file_size = 0, + bool marked_for_compact = false) { + VersionStorageInfo* base_vstorage = vstorage_.release(); + vstorage_.reset(new VersionStorageInfo(&icmp_, ucmp_, options_.num_levels, + kCompactionStyleUniversal, + base_vstorage, false)); + Add(level, file_number, smallest, largest, file_size, path_id, smallest_seq, + largest_seq, compensated_file_size, marked_for_compact); + + VersionBuilder builder(FileOptions(), &ioptions_, nullptr, base_vstorage, + nullptr); + builder.SaveTo(vstorage_.get()); + UpdateVersionStorageInfo(); + } + + private: + std::unique_ptr temp_vstorage_; }; TEST_F(CompactionPickerTest, Empty) { NewVersionStorage(6, kCompactionStyleLevel); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); } @@ -149,7 +198,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); } @@ -162,7 +212,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); @@ -175,7 +226,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(66U, compaction->input(0, 0)->fd.GetNumber()); @@ -193,7 +245,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(2U, compaction->num_input_files(1)); @@ -224,7 +277,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber()); @@ -271,7 +325,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); @@ -295,7 +350,8 @@ ASSERT_EQ(vstorage_->base_level(), num_levels - 2); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); @@ -320,7 +376,8 @@ ASSERT_EQ(vstorage_->base_level(), num_levels - 3); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); @@ -349,7 +406,8 @@ ASSERT_EQ(vstorage_->base_level(), num_levels - 3); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_files(0)); ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); @@ -371,8 +429,8 @@ mutable_cf_options_.max_bytes_for_level_multiplier = 10; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); - Add(num_levels - 1, 3U, "200", "250", 300U); - Add(num_levels - 1, 4U, "300", "350", 3000U); + Add(num_levels - 1, 2U, "200", "250", 300U); + Add(num_levels - 1, 3U, "300", "350", 3000U); Add(num_levels - 1, 4U, "400", "450", 3U); Add(num_levels - 2, 5U, "150", "180", 300U); Add(num_levels - 2, 6U, "181", "350", 500U); @@ -381,7 +439,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); ASSERT_EQ(5U, compaction->input(0, 0)->fd.GetNumber()); @@ -438,7 +497,8 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); // output level should be the one above the bottom-most ASSERT_EQ(1, compaction->output_level()); @@ -472,7 +532,8 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(!compaction->is_trivial_move()); } @@ -498,7 +559,8 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction->is_trivial_move()); } @@ -526,7 +588,8 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction); ASSERT_EQ(4, compaction->output_level()); @@ -556,7 +619,8 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_FALSE(compaction); } @@ -582,14 +646,15 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_FALSE(compaction); } TEST_F(CompactionPickerTest, UniversalPeriodicCompaction4) { // The case where universal periodic compaction couldn't form - // a compaction that inlcudes any file marked for periodic compaction. + // a compaction that includes any file marked for periodic compaction. // Right now we form the compaction anyway if it is more than one // sorted run. Just put the case here to validate that it doesn't // crash. @@ -612,7 +677,8 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(!compaction || compaction->start_level() != compaction->output_level()); } @@ -632,7 +698,8 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction); ASSERT_EQ(0, compaction->start_level()); ASSERT_EQ(1U, compaction->num_input_files(0)); @@ -656,7 +723,8 @@ std::unique_ptr compaction( universal_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction); ASSERT_EQ(4, compaction->start_level()); ASSERT_EQ(2U, compaction->num_input_files(0)); @@ -665,6 +733,221 @@ ASSERT_EQ(4, compaction->output_level()); } +TEST_F(CompactionPickerTest, UniversalIncrementalSpace1) { + const uint64_t kFileSize = 100000; + + mutable_cf_options_.max_compaction_bytes = 555555; + mutable_cf_options_.compaction_options_universal.incremental = true; + mutable_cf_options_.compaction_options_universal + .max_size_amplification_percent = 30; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + NewVersionStorage(5, kCompactionStyleUniversal); + + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(2, 2U, "010", "080", kFileSize, 0, 200, 251); + Add(3, 5U, "310", "380", kFileSize, 0, 200, 251); + Add(3, 6U, "410", "880", kFileSize, 0, 200, 251); + Add(3, 7U, "910", "980", 1, 0, 200, 251); + Add(4, 10U, "201", "250", kFileSize, 0, 101, 150); + Add(4, 11U, "301", "350", kFileSize, 0, 101, 150); + Add(4, 12U, "401", "450", kFileSize, 0, 101, 150); + Add(4, 13U, "501", "750", kFileSize, 0, 101, 150); + Add(4, 14U, "801", "850", kFileSize, 0, 101, 150); + Add(4, 15U, "901", "950", kFileSize, 0, 101, 150); + // Add(4, 15U, "960", "970", kFileSize, 0, 101, 150); + + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction); + ASSERT_EQ(4, compaction->output_level()); + ASSERT_EQ(3, compaction->start_level()); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(5U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(6U, compaction->input(0, 1)->fd.GetNumber()); + // ASSERT_EQ(4U, compaction->num_input_files(1)); + ASSERT_EQ(11U, compaction->input(1, 0)->fd.GetNumber()); + ASSERT_EQ(12U, compaction->input(1, 1)->fd.GetNumber()); + ASSERT_EQ(13U, compaction->input(1, 2)->fd.GetNumber()); + ASSERT_EQ(14U, compaction->input(1, 3)->fd.GetNumber()); +} + +TEST_F(CompactionPickerTest, UniversalIncrementalSpace2) { + const uint64_t kFileSize = 100000; + + mutable_cf_options_.max_compaction_bytes = 400000; + mutable_cf_options_.compaction_options_universal.incremental = true; + mutable_cf_options_.compaction_options_universal + .max_size_amplification_percent = 30; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + NewVersionStorage(5, kCompactionStyleUniversal); + + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(1, 2U, "010", "080", kFileSize, 0, 200, 251); + Add(2, 5U, "310", "380", kFileSize, 0, 200, 251); + Add(2, 6U, "410", "880", kFileSize, 0, 200, 251); + Add(2, 7U, "910", "980", kFileSize, 0, 200, 251); + Add(4, 10U, "201", "250", kFileSize, 0, 101, 150); + Add(4, 11U, "301", "350", kFileSize, 0, 101, 150); + Add(4, 12U, "401", "450", kFileSize, 0, 101, 150); + Add(4, 13U, "501", "750", kFileSize, 0, 101, 150); + Add(4, 14U, "801", "850", kFileSize, 0, 101, 150); + Add(4, 15U, "901", "950", kFileSize, 0, 101, 150); + + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction); + ASSERT_EQ(4, compaction->output_level()); + ASSERT_EQ(2, compaction->start_level()); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(1U, compaction->num_input_files(1)); + ASSERT_EQ(15U, compaction->input(1, 0)->fd.GetNumber()); +} + +TEST_F(CompactionPickerTest, UniversalIncrementalSpace3) { + // Test bottom level files falling between gaps between two upper level + // files + const uint64_t kFileSize = 100000; + + mutable_cf_options_.max_compaction_bytes = 300000; + mutable_cf_options_.compaction_options_universal.incremental = true; + mutable_cf_options_.compaction_options_universal + .max_size_amplification_percent = 30; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + NewVersionStorage(5, kCompactionStyleUniversal); + + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(2, 2U, "010", "080", kFileSize, 0, 200, 251); + Add(3, 5U, "000", "180", kFileSize, 0, 200, 251); + Add(3, 6U, "181", "190", kFileSize, 0, 200, 251); + Add(3, 7U, "710", "810", kFileSize, 0, 200, 251); + Add(3, 8U, "820", "830", kFileSize, 0, 200, 251); + Add(3, 9U, "900", "991", kFileSize, 0, 200, 251); + Add(4, 10U, "201", "250", kFileSize, 0, 101, 150); + Add(4, 11U, "301", "350", kFileSize, 0, 101, 150); + Add(4, 12U, "401", "450", kFileSize, 0, 101, 150); + Add(4, 13U, "501", "750", kFileSize, 0, 101, 150); + Add(4, 14U, "801", "850", kFileSize, 0, 101, 150); + Add(4, 15U, "901", "950", kFileSize, 0, 101, 150); + + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction); + ASSERT_EQ(4, compaction->output_level()); + ASSERT_EQ(2, compaction->start_level()); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(2U, compaction->num_input_files(1)); + ASSERT_EQ(5U, compaction->input(1, 0)->fd.GetNumber()); + ASSERT_EQ(6U, compaction->input(1, 1)->fd.GetNumber()); + ASSERT_EQ(0, compaction->num_input_files(2)); +} + +TEST_F(CompactionPickerTest, UniversalIncrementalSpace4) { + // Test compaction candidates always cover many files. + const uint64_t kFileSize = 100000; + + mutable_cf_options_.max_compaction_bytes = 3200000; + mutable_cf_options_.compaction_options_universal.incremental = true; + mutable_cf_options_.compaction_options_universal + .max_size_amplification_percent = 30; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + NewVersionStorage(5, kCompactionStyleUniversal); + + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(2, 2U, "010", "080", kFileSize, 0, 200, 251); + + // Generate files like following: + // L3: (1101, 1180) (1201, 1280) ... (7901, 7908) + // L4: (1130, 1150) (1160, 1210) (1230, 1250) (1260 1310) ... (7960, 8010) + for (int i = 11; i < 79; i++) { + Add(3, 100 + i * 3, ToString(i * 100).c_str(), + ToString(i * 100 + 80).c_str(), kFileSize, 0, 200, 251); + // Add a tie breaker + if (i == 66) { + Add(3, 10000U, "6690", "6699", kFileSize, 0, 200, 251); + } + + Add(4, 100 + i * 3 + 1, ToString(i * 100 + 30).c_str(), + ToString(i * 100 + 50).c_str(), kFileSize, 0, 200, 251); + Add(4, 100 + i * 3 + 2, ToString(i * 100 + 60).c_str(), + ToString(i * 100 + 110).c_str(), kFileSize, 0, 200, 251); + } + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction); + ASSERT_EQ(4, compaction->output_level()); + ASSERT_EQ(3, compaction->start_level()); + ASSERT_EQ(6U, compaction->num_input_files(0)); + ASSERT_EQ(100 + 62U * 3, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(10000U, compaction->input(0, 5)->fd.GetNumber()); + ASSERT_EQ(11, compaction->num_input_files(1)); +} + +TEST_F(CompactionPickerTest, UniversalIncrementalSpace5) { + // Test compaction candidates always cover many files with some single + // files larger than size threshold. + const uint64_t kFileSize = 100000; + + mutable_cf_options_.max_compaction_bytes = 3200000; + mutable_cf_options_.compaction_options_universal.incremental = true; + mutable_cf_options_.compaction_options_universal + .max_size_amplification_percent = 30; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + NewVersionStorage(5, kCompactionStyleUniversal); + + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(2, 2U, "010", "080", kFileSize, 0, 200, 251); + + // Generate files like following: + // L3: (1101, 1180) (1201, 1280) ... (7901, 7908) + // L4: (1130, 1150) (1160, 1210) (1230, 1250) (1260 1310) ... (7960, 8010) + for (int i = 11; i < 70; i++) { + Add(3, 100 + i * 3, ToString(i * 100).c_str(), + ToString(i * 100 + 80).c_str(), + i % 10 == 9 ? kFileSize * 100 : kFileSize, 0, 200, 251); + + Add(4, 100 + i * 3 + 1, ToString(i * 100 + 30).c_str(), + ToString(i * 100 + 50).c_str(), kFileSize, 0, 200, 251); + Add(4, 100 + i * 3 + 2, ToString(i * 100 + 60).c_str(), + ToString(i * 100 + 110).c_str(), kFileSize, 0, 200, 251); + } + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction); + ASSERT_EQ(4, compaction->output_level()); + ASSERT_EQ(3, compaction->start_level()); + ASSERT_EQ(6U, compaction->num_input_files(0)); + ASSERT_EQ(100 + 14 * 3, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(100 + 19 * 3, compaction->input(0, 5)->fd.GetNumber()); + ASSERT_EQ(13, compaction->num_input_files(1)); +} + TEST_F(CompactionPickerTest, NeedsCompactionFIFO) { NewVersionStorage(1, kCompactionStyleFIFO); const int kFileCount = @@ -681,18 +964,255 @@ // verify whether compaction is needed based on the current // size of L0 files. - uint64_t current_size = 0; for (int i = 1; i <= kFileCount; ++i) { NewVersionStorage(1, kCompactionStyleFIFO); Add(0, i, ToString((i + 100) * 1000).c_str(), - ToString((i + 100) * 1000 + 999).c_str(), - kFileSize, 0, i * 100, i * 100 + 99); - current_size += kFileSize; + ToString((i + 100) * 1000 + 999).c_str(), kFileSize, 0, i * 100, + i * 100 + 99); UpdateVersionStorageInfo(); ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), vstorage_->CompactionScore(0) >= 1); } } + +TEST_F(CompactionPickerTest, FIFOToWarm1) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kWarmThreshold = 2000; + + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.age_for_warm = kWarmThreshold; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kWarmThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, static_cast(current_time) - 100); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, threshold_time + 100); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, threshold_time - 2000); + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, threshold_time - 3000); + UpdateVersionStorageInfo(); + + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(3U, compaction->input(0, 0)->fd.GetNumber()); +} + +TEST_F(CompactionPickerTest, FIFOToWarm2) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kWarmThreshold = 2000; + + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.age_for_warm = kWarmThreshold; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kWarmThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, static_cast(current_time) - 100); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, threshold_time + 100); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, threshold_time - 2000); + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, threshold_time - 3000); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, threshold_time - 4000); + UpdateVersionStorageInfo(); + + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(3U, compaction->input(0, 1)->fd.GetNumber()); +} + +TEST_F(CompactionPickerTest, FIFOToWarmMaxSize) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kWarmThreshold = 2000; + + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.age_for_warm = kWarmThreshold; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.max_compaction_bytes = kFileSize * 9; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kWarmThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, static_cast(current_time) - 100); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, threshold_time + 100); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, threshold_time - 2000); + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, threshold_time - 3000); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, threshold_time - 4000); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kUnknown, threshold_time - 5000); + UpdateVersionStorageInfo(); + + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); +} + +TEST_F(CompactionPickerTest, FIFOToWarmWithExistingWarm) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kWarmThreshold = 2000; + + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.age_for_warm = kWarmThreshold; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kWarmThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, static_cast(current_time) - 100); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, threshold_time + 100); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, threshold_time - 2000); + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, threshold_time - 3000); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, threshold_time - 4000); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kWarm, threshold_time - 5000); + UpdateVersionStorageInfo(); + + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(3U, compaction->input(0, 1)->fd.GetNumber()); +} + +TEST_F(CompactionPickerTest, FIFOToWarmWithOngoing) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kWarmThreshold = 2000; + + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.age_for_warm = kWarmThreshold; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kWarmThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, static_cast(current_time) - 100); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, threshold_time + 100); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, threshold_time - 2000); + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kUnknown, threshold_time - 3000); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, threshold_time - 4000); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kWarm, threshold_time - 5000); + file_map_[2].first->being_compacted = true; + UpdateVersionStorageInfo(); + + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + // Stop if a file is being compacted + ASSERT_TRUE(compaction.get() == nullptr); +} + +TEST_F(CompactionPickerTest, FIFOToWarmWithHotBetweenWarms) { + NewVersionStorage(1, kCompactionStyleFIFO); + const uint64_t kFileSize = 100000; + const uint64_t kMaxSize = kFileSize * 100000; + uint64_t kWarmThreshold = 2000; + + fifo_options_.max_table_files_size = kMaxSize; + fifo_options_.age_for_warm = kWarmThreshold; + mutable_cf_options_.compaction_options_fifo = fifo_options_; + mutable_cf_options_.level0_file_num_compaction_trigger = 2; + mutable_cf_options_.max_compaction_bytes = kFileSize * 100; + FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_); + + int64_t current_time = 0; + ASSERT_OK(Env::Default()->GetCurrentTime(¤t_time)); + uint64_t threshold_time = + static_cast(current_time) - kWarmThreshold; + Add(0, 6U, "240", "290", 2 * kFileSize, 0, 2900, 3000, 0, true, + Temperature::kUnknown, static_cast(current_time) - 100); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 2700, 2800, 0, true, + Temperature::kUnknown, threshold_time + 100); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 2500, 2600, 0, true, + Temperature::kUnknown, threshold_time - 2000); + Add(0, 3U, "200", "300", 4 * kFileSize, 0, 2300, 2400, 0, true, + Temperature::kWarm, threshold_time - 3000); + Add(0, 2U, "200", "300", 4 * kFileSize, 0, 2100, 2200, 0, true, + Temperature::kUnknown, threshold_time - 4000); + Add(0, 1U, "200", "300", 4 * kFileSize, 0, 2000, 2100, 0, true, + Temperature::kWarm, threshold_time - 5000); + UpdateVersionStorageInfo(); + + ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), true); + std::unique_ptr compaction(fifo_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + // Stop if a file is being compacted + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber()); +} + #endif // ROCKSDB_LITE TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) { @@ -716,7 +1236,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); // Pick file 8 because it overlaps with 0 files on level 3. @@ -735,11 +1256,11 @@ Add(2, 6U, "150", "175", 60000000U); // Overlaps with file 26, 27, total size 521M Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27, 28, total size - // 520M, the smalelst overlapping + // 520M, the smallest overlapping Add(2, 8U, "201", "300", 60000000U); // Overlaps with file 28, 29, total size 521M - Add(3, 26U, "100", "110", 261000000U); + Add(3, 25U, "100", "110", 261000000U); Add(3, 26U, "150", "170", 261000000U); Add(3, 27U, "171", "179", 260000000U); Add(3, 28U, "191", "220", 260000000U); @@ -748,7 +1269,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); // Picking file 7 because overlapping ratio is the biggest. @@ -775,7 +1297,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); // Picking file 8 because overlapping ratio is the biggest. @@ -804,7 +1327,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); // Picking file 8 because overlapping ratio is the biggest. @@ -831,7 +1355,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); } // This test checks ExpandWhileOverlapping() by having overlapping user keys @@ -848,7 +1373,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(2U, compaction->num_input_files(0)); @@ -867,7 +1393,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(2U, compaction->num_input_files(0)); @@ -894,7 +1421,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(5U, compaction->num_input_files(0)); @@ -924,7 +1452,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); @@ -947,7 +1476,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); } @@ -968,7 +1498,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); @@ -988,7 +1519,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_GE(1U, compaction->num_input_files(0)); @@ -1016,7 +1548,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(3U, compaction->num_input_files(0)); @@ -1048,7 +1581,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(5U, compaction->num_input_files(0)); @@ -1088,7 +1622,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); @@ -1126,7 +1661,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); @@ -1135,6 +1671,66 @@ ASSERT_EQ(7U, compaction->input(1, 0)->fd.GetNumber()); } +TEST_F(CompactionPickerTest, FileTtlBooster) { + // Set TTL to 2048 + // TTL boosting for all levels starts at 1024, + // Whole TTL range is 2048 * 31 / 32 - 1024 = 1984 - 1024 = 960. + // From second last level (L5), range starts at + // 1024 + 480, 1024 + 240, 1024 + 120 (which is L3). + // Boosting step 124 / 16 = 7.75 -> 7 + // + const uint64_t kCurrentTime = 1000000; + FileMetaData meta; + + { + FileTtlBooster booster(kCurrentTime, 2048, 7, 3); + + // Not triggering if the file is younger than ttl/2 + meta.oldest_ancester_time = kCurrentTime - 1023; + ASSERT_EQ(1, booster.GetBoostScore(&meta)); + meta.oldest_ancester_time = kCurrentTime - 1024; + ASSERT_EQ(1, booster.GetBoostScore(&meta)); + meta.oldest_ancester_time = kCurrentTime + 10; + ASSERT_EQ(1, booster.GetBoostScore(&meta)); + + // Within one boosting step + meta.oldest_ancester_time = kCurrentTime - (1024 + 120 + 6); + ASSERT_EQ(1, booster.GetBoostScore(&meta)); + + // One boosting step + meta.oldest_ancester_time = kCurrentTime - (1024 + 120 + 7); + ASSERT_EQ(2, booster.GetBoostScore(&meta)); + meta.oldest_ancester_time = kCurrentTime - (1024 + 120 + 8); + ASSERT_EQ(2, booster.GetBoostScore(&meta)); + + // Multiple boosting steps + meta.oldest_ancester_time = kCurrentTime - (1024 + 120 + 30); + ASSERT_EQ(5, booster.GetBoostScore(&meta)); + + // Very high boosting steps + meta.oldest_ancester_time = kCurrentTime - (1024 + 120 + 700); + ASSERT_EQ(101, booster.GetBoostScore(&meta)); + } + { + // Test second last level + FileTtlBooster booster(kCurrentTime, 2048, 7, 5); + meta.oldest_ancester_time = kCurrentTime - (1024 + 480); + ASSERT_EQ(1, booster.GetBoostScore(&meta)); + meta.oldest_ancester_time = kCurrentTime - (1024 + 480 + 60); + ASSERT_EQ(3, booster.GetBoostScore(&meta)); + } + { + // Test last level + FileTtlBooster booster(kCurrentTime, 2048, 7, 6); + meta.oldest_ancester_time = kCurrentTime - (1024 + 480); + ASSERT_EQ(1, booster.GetBoostScore(&meta)); + meta.oldest_ancester_time = kCurrentTime - (1024 + 480 + 60); + ASSERT_EQ(1, booster.GetBoostScore(&meta)); + meta.oldest_ancester_time = kCurrentTime - 3000; + ASSERT_EQ(1, booster.GetBoostScore(&meta)); + } +} + TEST_F(CompactionPickerTest, NotScheduleL1IfL0WithHigherPri1) { NewVersionStorage(6, kCompactionStyleLevel); mutable_cf_options_.level0_file_num_compaction_trigger = 2; @@ -1148,7 +1744,7 @@ Add(0, 32U, "001", "400", 1000000000U, 0, 0); Add(0, 33U, "001", "400", 1000000000U, 0, 0); - // L1 total size 2GB, score 2.2. If one file being comapcted, score 1.1. + // L1 total size 2GB, score 2.2. If one file being compacted, score 1.1. Add(1, 4U, "050", "300", 1000000000U, 0, 0); file_map_[4u].first->being_compacted = true; Add(1, 5U, "301", "350", 1000000000U, 0, 0); @@ -1163,7 +1759,8 @@ ASSERT_EQ(0, vstorage_->CompactionScoreLevel(0)); ASSERT_EQ(1, vstorage_->CompactionScoreLevel(1)); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); } @@ -1180,7 +1777,7 @@ Add(0, 32U, "001", "400", 1000000000U, 0, 0); Add(0, 33U, "001", "400", 1000000000U, 0, 0); - // L1 total size 2GB, score 2.2. If one file being comapcted, score 1.1. + // L1 total size 2GB, score 2.2. If one file being compacted, score 1.1. Add(1, 4U, "050", "300", 1000000000U, 0, 0); Add(1, 5U, "301", "350", 1000000000U, 0, 0); @@ -1193,7 +1790,8 @@ ASSERT_EQ(0, vstorage_->CompactionScoreLevel(0)); ASSERT_EQ(1, vstorage_->CompactionScoreLevel(1)); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); } @@ -1226,7 +1824,8 @@ ASSERT_EQ(1, vstorage_->CompactionScoreLevel(0)); ASSERT_EQ(0, vstorage_->CompactionScoreLevel(1)); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); } @@ -1255,7 +1854,7 @@ // Size ratio L4/L3 is 9.9 // After merge from L3, L4 size is 1000900 Add(4, 11U, "400", "500", 999900); - Add(5, 11U, "400", "500", 8007200); + Add(5, 12U, "400", "500", 8007200); UpdateVersionStorageInfo(); @@ -1520,7 +2119,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); @@ -1544,7 +2144,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(2U, compaction->num_input_levels()); ASSERT_EQ(3U, compaction->num_input_files(0)); @@ -1568,16 +2169,43 @@ Add(3, 5U, "120", "130", 7000U); Add(3, 6U, "170", "180", 7000U); - Add(3, 5U, "220", "230", 7000U); - Add(3, 5U, "270", "280", 7000U); + Add(3, 7U, "220", "230", 7000U); + Add(3, 8U, "270", "280", 7000U); UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_TRUE(compaction->IsTrivialMove()); } +TEST_F(CompactionPickerTest, IsTrivialMoveOffSstPartitioned) { + mutable_cf_options_.max_bytes_for_level_base = 10000u; + mutable_cf_options_.max_compaction_bytes = 10001u; + ioptions_.level_compaction_dynamic_level_bytes = false; + ioptions_.sst_partitioner_factory = NewSstPartitionerFixedPrefixFactory(1); + NewVersionStorage(6, kCompactionStyleLevel); + // A compaction should be triggered and pick file 2 + Add(1, 1U, "100", "150", 3000U); + Add(1, 2U, "151", "200", 3001U); + Add(1, 3U, "201", "250", 3000U); + Add(1, 4U, "251", "300", 3000U); + + Add(3, 5U, "120", "130", 7000U); + Add(3, 6U, "170", "180", 7000U); + Add(3, 7U, "220", "230", 7000U); + Add(3, 8U, "270", "280", 7000U); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction(level_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + // No trivial move, because partitioning is applied + ASSERT_TRUE(!compaction->IsTrivialMove()); +} + TEST_F(CompactionPickerTest, IsTrivialMoveOff) { mutable_cf_options_.max_bytes_for_level_base = 1000000u; mutable_cf_options_.max_compaction_bytes = 10000u; @@ -1594,7 +2222,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_FALSE(compaction->IsTrivialMove()); } @@ -1619,7 +2248,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); @@ -1628,7 +2258,8 @@ ASSERT_EQ(2, vstorage_->NextCompactionIndex(1 /* level */)); compaction.reset(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(1U, compaction->num_input_files(0)); @@ -1637,7 +2268,8 @@ ASSERT_EQ(3, vstorage_->NextCompactionIndex(1 /* level */)); compaction.reset(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() == nullptr); ASSERT_EQ(4, vstorage_->NextCompactionIndex(1 /* level */)); } @@ -1662,7 +2294,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(5U, compaction->num_input_files(0)); @@ -1692,7 +2325,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(4U, compaction->num_input_files(0)); @@ -1724,7 +2358,8 @@ UpdateVersionStorageInfo(); std::unique_ptr compaction(level_compaction_picker.PickCompaction( - cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_, 107)); + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_, 107)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(4U, compaction->num_input_files(0)); @@ -1733,6 +2368,336 @@ ASSERT_EQ(0, compaction->output_level()); } +#ifndef ROCKSDB_LITE +TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) { + const uint64_t kFileSize = 100000; + + ioptions_.compaction_style = kCompactionStyleUniversal; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + // This test covers the case where a "regular" universal compaction is + // scheduled first, followed by a delete triggered compaction. The latter + // should fail + NewVersionStorage(5, kCompactionStyleUniversal); + + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(0, 2U, "201", "250", 2 * kFileSize, 0, 401, 450); + Add(0, 4U, "260", "300", 4 * kFileSize, 0, 260, 300); + Add(3, 5U, "010", "080", 8 * kFileSize, 0, 200, 251); + Add(4, 3U, "301", "350", 8 * kFileSize, 0, 101, 150); + Add(4, 6U, "501", "750", 8 * kFileSize, 0, 101, 150); + + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + + ASSERT_TRUE(compaction); + // Validate that its a compaction to reduce sorted runs + ASSERT_EQ(CompactionReason::kUniversalSortedRunNum, + compaction->compaction_reason()); + ASSERT_EQ(0, compaction->output_level()); + ASSERT_EQ(0, compaction->start_level()); + ASSERT_EQ(2U, compaction->num_input_files(0)); + + AddVersionStorage(); + // Simulate a flush and mark the file for compaction + Add(0, 7U, "150", "200", kFileSize, 0, 551, 600, 0, true); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction2( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_FALSE(compaction2); +} + +TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap2) { + const uint64_t kFileSize = 100000; + + ioptions_.compaction_style = kCompactionStyleUniversal; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + // This test covers the case where a delete triggered compaction is + // scheduled first, followed by a "regular" compaction. The latter + // should fail + NewVersionStorage(5, kCompactionStyleUniversal); + + // Mark file number 4 for compaction + Add(0, 4U, "260", "300", 4 * kFileSize, 0, 260, 300, 0, true); + Add(3, 5U, "240", "290", 8 * kFileSize, 0, 201, 250); + Add(4, 3U, "301", "350", 8 * kFileSize, 0, 101, 150); + Add(4, 6U, "501", "750", 8 * kFileSize, 0, 101, 150); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + + ASSERT_TRUE(compaction); + // Validate that its a delete triggered compaction + ASSERT_EQ(CompactionReason::kFilesMarkedForCompaction, + compaction->compaction_reason()); + ASSERT_EQ(3, compaction->output_level()); + ASSERT_EQ(0, compaction->start_level()); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->num_input_files(1)); + + AddVersionStorage(); + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(0, 2U, "201", "250", 2 * kFileSize, 0, 401, 450); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction2( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_FALSE(compaction2); +} + +TEST_F(CompactionPickerTest, UniversalMarkedCompactionStartOutputOverlap) { + // The case where universal periodic compaction can be picked + // with some newer files being compacted. + const uint64_t kFileSize = 100000; + + ioptions_.compaction_style = kCompactionStyleUniversal; + + bool input_level_overlap = false; + bool output_level_overlap = false; + // Let's mark 2 files in 2 different levels for compaction. The + // compaction picker will randomly pick one, so use the sync point to + // ensure a deterministic order. Loop until both cases are covered + size_t random_index = 0; + SyncPoint::GetInstance()->SetCallBack( + "CompactionPicker::PickFilesMarkedForCompaction", [&](void* arg) { + size_t* index = static_cast(arg); + *index = random_index; + }); + SyncPoint::GetInstance()->EnableProcessing(); + while (!input_level_overlap || !output_level_overlap) { + // Ensure that the L0 file gets picked first + random_index = !input_level_overlap ? 0 : 1; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + NewVersionStorage(5, kCompactionStyleUniversal); + + Add(0, 1U, "260", "300", 4 * kFileSize, 0, 260, 300, 0, true); + Add(3, 2U, "010", "020", 2 * kFileSize, 0, 201, 248); + Add(3, 3U, "250", "270", 2 * kFileSize, 0, 202, 249); + Add(3, 4U, "290", "310", 2 * kFileSize, 0, 203, 250); + Add(3, 5U, "310", "320", 2 * kFileSize, 0, 204, 251, 0, true); + Add(4, 6U, "301", "350", 8 * kFileSize, 0, 101, 150); + Add(4, 7U, "501", "750", 8 * kFileSize, 0, 101, 150); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + + ASSERT_TRUE(compaction); + // Validate that its a delete triggered compaction + ASSERT_EQ(CompactionReason::kFilesMarkedForCompaction, + compaction->compaction_reason()); + ASSERT_TRUE(compaction->start_level() == 0 || + compaction->start_level() == 3); + if (compaction->start_level() == 0) { + // The L0 file was picked. The next compaction will detect an + // overlap on its input level + input_level_overlap = true; + ASSERT_EQ(3, compaction->output_level()); + ASSERT_EQ(1U, compaction->num_input_files(0)); + ASSERT_EQ(3U, compaction->num_input_files(1)); + } else { + // The level 3 file was picked. The next compaction will pick + // the L0 file and will detect overlap when adding output + // level inputs + output_level_overlap = true; + ASSERT_EQ(4, compaction->output_level()); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->num_input_files(1)); + } + + vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); + // After recomputing the compaction score, only one marked file will remain + random_index = 0; + std::unique_ptr compaction2( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_FALSE(compaction2); + DeleteVersionStorage(); + } +} + +TEST_F(CompactionPickerTest, UniversalMarkedL0NoOverlap) { + const uint64_t kFileSize = 100000; + + ioptions_.compaction_style = kCompactionStyleUniversal; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + // This test covers the case where a delete triggered compaction is + // scheduled and should result in a full compaction + NewVersionStorage(1, kCompactionStyleUniversal); + + // Mark file number 4 for compaction + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 260, 300, 0, true); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 201, 250); + Add(0, 3U, "301", "350", 4 * kFileSize, 0, 101, 150); + Add(0, 6U, "501", "750", 8 * kFileSize, 0, 50, 100); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + + ASSERT_TRUE(compaction); + // Validate that its a delete triggered compaction + ASSERT_EQ(CompactionReason::kFilesMarkedForCompaction, + compaction->compaction_reason()); + ASSERT_EQ(0, compaction->output_level()); + ASSERT_EQ(0, compaction->start_level()); + ASSERT_EQ(4U, compaction->num_input_files(0)); + ASSERT_TRUE(file_map_[4].first->being_compacted); + ASSERT_TRUE(file_map_[5].first->being_compacted); + ASSERT_TRUE(file_map_[3].first->being_compacted); + ASSERT_TRUE(file_map_[6].first->being_compacted); +} + +TEST_F(CompactionPickerTest, UniversalMarkedL0WithOverlap) { + const uint64_t kFileSize = 100000; + + ioptions_.compaction_style = kCompactionStyleUniversal; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + // This test covers the case where a file is being compacted, and a + // delete triggered compaction is then scheduled. The latter should stop + // at the first file being compacted + NewVersionStorage(1, kCompactionStyleUniversal); + + // Mark file number 4 for compaction + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 260, 300, 0, true); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 201, 250); + Add(0, 3U, "301", "350", 4 * kFileSize, 0, 101, 150); + Add(0, 6U, "501", "750", 8 * kFileSize, 0, 50, 100); + UpdateVersionStorageInfo(); + file_map_[3].first->being_compacted = true; + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + + ASSERT_TRUE(compaction); + // Validate that its a delete triggered compaction + ASSERT_EQ(CompactionReason::kFilesMarkedForCompaction, + compaction->compaction_reason()); + ASSERT_EQ(0, compaction->output_level()); + ASSERT_EQ(0, compaction->start_level()); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_TRUE(file_map_[4].first->being_compacted); + ASSERT_TRUE(file_map_[5].first->being_compacted); +} + +TEST_F(CompactionPickerTest, UniversalMarkedL0Overlap2) { + const uint64_t kFileSize = 100000; + + ioptions_.compaction_style = kCompactionStyleUniversal; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + // This test covers the case where a delete triggered compaction is + // scheduled first, followed by a "regular" compaction. The latter + // should fail + NewVersionStorage(1, kCompactionStyleUniversal); + + // Mark file number 4 for compaction + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 260, 300); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 201, 250, 0, true); + Add(0, 3U, "301", "350", 4 * kFileSize, 0, 101, 150); + Add(0, 6U, "501", "750", 8 * kFileSize, 0, 50, 100); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + + ASSERT_TRUE(compaction); + // Validate that its a delete triggered compaction + ASSERT_EQ(CompactionReason::kFilesMarkedForCompaction, + compaction->compaction_reason()); + ASSERT_EQ(0, compaction->output_level()); + ASSERT_EQ(0, compaction->start_level()); + ASSERT_EQ(3U, compaction->num_input_files(0)); + ASSERT_TRUE(file_map_[5].first->being_compacted); + ASSERT_TRUE(file_map_[3].first->being_compacted); + ASSERT_TRUE(file_map_[6].first->being_compacted); + + AddVersionStorage(); + Add(0, 1U, "150", "200", kFileSize, 0, 500, 550); + Add(0, 2U, "201", "250", kFileSize, 0, 401, 450); + UpdateVersionStorageInfo(); + + std::unique_ptr compaction2( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction2); + ASSERT_EQ(3U, compaction->num_input_files(0)); + ASSERT_TRUE(file_map_[1].first->being_compacted); + ASSERT_TRUE(file_map_[2].first->being_compacted); + ASSERT_TRUE(file_map_[4].first->being_compacted); +} + +TEST_F(CompactionPickerTest, UniversalMarkedManualCompaction) { + const uint64_t kFileSize = 100000; + const int kNumLevels = 7; + + // This test makes sure the `files_marked_for_compaction_` is updated after + // creating manual compaction. + ioptions_.compaction_style = kCompactionStyleUniversal; + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + + NewVersionStorage(kNumLevels, kCompactionStyleUniversal); + + // Add 3 files marked for compaction + Add(0, 3U, "301", "350", 4 * kFileSize, 0, 101, 150, 0, true); + Add(0, 4U, "260", "300", 1 * kFileSize, 0, 260, 300, 0, true); + Add(0, 5U, "240", "290", 2 * kFileSize, 0, 201, 250, 0, true); + UpdateVersionStorageInfo(); + + // All 3 files are marked for compaction + ASSERT_EQ(3U, vstorage_->FilesMarkedForCompaction().size()); + + bool manual_conflict = false; + InternalKey* manual_end = NULL; + std::unique_ptr compaction( + universal_compaction_picker.CompactRange( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + ColumnFamilyData::kCompactAllLevels, 6, CompactRangeOptions(), NULL, + NULL, &manual_end, &manual_conflict, port::kMaxUint64)); + + ASSERT_TRUE(compaction); + + ASSERT_EQ(CompactionReason::kManualCompaction, + compaction->compaction_reason()); + ASSERT_EQ(kNumLevels - 1, compaction->output_level()); + ASSERT_EQ(0, compaction->start_level()); + ASSERT_EQ(3U, compaction->num_input_files(0)); + ASSERT_TRUE(file_map_[3].first->being_compacted); + ASSERT_TRUE(file_map_[4].first->being_compacted); + ASSERT_TRUE(file_map_[5].first->being_compacted); + + // After creating the manual compaction, all files should be cleared from + // `FilesMarkedForCompaction`. So they won't be picked by others. + ASSERT_EQ(0U, vstorage_->FilesMarkedForCompaction().size()); +} + +#endif // ROCKSDB_LITE + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc 2025-05-19 16:14:27.000000000 +0000 @@ -15,9 +15,11 @@ #include #include #include + #include "db/column_family.h" #include "file/filename.h" #include "logging/log_buffer.h" +#include "logging/logging.h" #include "monitoring/statistics.h" #include "test_util/sync_point.h" #include "util/random.h" @@ -31,17 +33,16 @@ // PickCompaction(). class UniversalCompactionBuilder { public: - UniversalCompactionBuilder(const ImmutableCFOptions& ioptions, - const InternalKeyComparator* icmp, - const std::string& cf_name, - const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, - UniversalCompactionPicker* picker, - LogBuffer* log_buffer) + UniversalCompactionBuilder( + const ImmutableOptions& ioptions, const InternalKeyComparator* icmp, + const std::string& cf_name, const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + UniversalCompactionPicker* picker, LogBuffer* log_buffer) : ioptions_(ioptions), icmp_(icmp), cf_name_(cf_name), mutable_cf_options_(mutable_cf_options), + mutable_db_options_(mutable_db_options), vstorage_(vstorage), picker_(picker), log_buffer_(log_buffer) {} @@ -88,6 +89,14 @@ // Pick Universal compaction to limit space amplification. Compaction* PickCompactionToReduceSizeAmp(); + // Try to pick incremental compaction to reduce space amplification. + // It will return null if it cannot find a fanout within the threshold. + // Fanout is defined as + // total size of files to compact at output level + // -------------------------------------------------- + // total size of files to compact at other levels + Compaction* PickIncrementalForReduceSizeAmp(double fanout_threshold); + Compaction* PickDeleteTriggeredCompaction(); // Form a compaction from the sorted run indicated by start_index to the @@ -103,25 +112,27 @@ // because some files are being compacted. Compaction* PickPeriodicCompaction(); - // Used in universal compaction when the enabled_trivial_move + // Used in universal compaction when the allow_trivial_move // option is set. Checks whether there are any overlapping files // in the input. Returns true if the input files are non // overlapping. bool IsInputFilesNonOverlapping(Compaction* c); - const ImmutableCFOptions& ioptions_; + uint64_t GetMaxOverlappingBytes() const; + + const ImmutableOptions& ioptions_; const InternalKeyComparator* icmp_; double score_; std::vector sorted_runs_; const std::string& cf_name_; const MutableCFOptions& mutable_cf_options_; + const MutableDBOptions& mutable_db_options_; VersionStorageInfo* vstorage_; UniversalCompactionPicker* picker_; LogBuffer* log_buffer_; static std::vector CalculateSortedRuns( - const VersionStorageInfo& vstorage, const ImmutableCFOptions& ioptions, - const MutableCFOptions& mutable_cf_options); + const VersionStorageInfo& vstorage); // Pick a path ID to place a newly generated file, with its estimated file // size. @@ -158,9 +169,9 @@ const Comparator* ucmp_; }; -typedef std::priority_queue, - SmallestKeyHeapComparator> - SmallestKeyHeap; +using SmallestKeyHeap = + std::priority_queue, + SmallestKeyHeapComparator>; // This function creates the heap that is used to find if the files are // overlapping during universal compaction when the allow_trivial_move @@ -278,11 +289,11 @@ Compaction* UniversalCompactionPicker::PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer, - SequenceNumber /* earliest_memtable_seqno */) { + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer, SequenceNumber /* earliest_memtable_seqno */) { UniversalCompactionBuilder builder(ioptions_, icmp_, cf_name, - mutable_cf_options, vstorage, this, - log_buffer); + mutable_cf_options, mutable_db_options, + vstorage, this, log_buffer); return builder.PickCompaction(); } @@ -325,8 +336,7 @@ std::vector UniversalCompactionBuilder::CalculateSortedRuns( - const VersionStorageInfo& vstorage, const ImmutableCFOptions& /*ioptions*/, - const MutableCFOptions& mutable_cf_options) { + const VersionStorageInfo& vstorage) { std::vector ret; for (FileMetaData* f : vstorage.LevelFiles(0)) { ret.emplace_back(0, f, f->fd.GetFileSize(), f->compensated_file_size, @@ -336,27 +346,16 @@ uint64_t total_compensated_size = 0U; uint64_t total_size = 0U; bool being_compacted = false; - bool is_first = true; for (FileMetaData* f : vstorage.LevelFiles(level)) { total_compensated_size += f->compensated_file_size; total_size += f->fd.GetFileSize(); - if (mutable_cf_options.compaction_options_universal.allow_trivial_move == - true) { - if (f->being_compacted) { - being_compacted = f->being_compacted; - } - } else { - // Compaction always includes all files for a non-zero level, so for a - // non-zero level, all the files should share the same being_compacted - // value. - // This assumption is only valid when - // mutable_cf_options.compaction_options_universal.allow_trivial_move - // is false - assert(is_first || f->being_compacted == being_compacted); - } - if (is_first) { + // Size amp, read amp and periodic compactions always include all files + // for a non-zero level. However, a delete triggered compaction and + // a trivial move might pick a subset of files in a sorted run. So + // always check all files in a sorted run and mark the entire run as + // being compacted if one or more files are being compacted + if (f->being_compacted) { being_compacted = f->being_compacted; - is_first = false; } } if (total_compensated_size > 0) { @@ -372,8 +371,7 @@ Compaction* UniversalCompactionBuilder::PickCompaction() { const int kLevel0 = 0; score_ = vstorage_->CompactionScore(kLevel0); - sorted_runs_ = - CalculateSortedRuns(*vstorage_, ioptions_, mutable_cf_options_); + sorted_runs_ = CalculateSortedRuns(*vstorage_); if (sorted_runs_.size() == 0 || (vstorage_->FilesMarkedForPeriodicCompaction().empty() && @@ -389,7 +387,7 @@ VersionStorageInfo::LevelSummaryStorage tmp; ROCKS_LOG_BUFFER_MAX_SZ( log_buffer_, 3072, - "[%s] Universal: sorted runs files(%" ROCKSDB_PRIszt "): %s\n", + "[%s] Universal: sorted runs: %" ROCKSDB_PRIszt " files: %s\n", cf_name_.c_str(), sorted_runs_.size(), vstorage_->LevelSummary(&tmp)); Compaction* c = nullptr; @@ -475,7 +473,6 @@ // validate that all the chosen files of L0 are non overlapping in time #ifndef NDEBUG - SequenceNumber prev_smallest_seqno = 0U; bool is_first = true; size_t level_index = 0U; @@ -485,7 +482,6 @@ if (is_first) { is_first = false; } - prev_smallest_seqno = f->fd.smallest_seqno; } level_index = 1U; } @@ -497,22 +493,16 @@ &largest_seqno); if (is_first) { is_first = false; - } else if (prev_smallest_seqno > 0) { - // A level is considered as the bottommost level if there are - // no files in higher levels or if files in higher levels do - // not overlap with the files being compacted. Sequence numbers - // of files in bottommost level can be set to 0 to help - // compression. As a result, the following assert may not hold - // if the prev_smallest_seqno is 0. - assert(prev_smallest_seqno > largest_seqno); } - prev_smallest_seqno = smallest_seqno; } } #endif // update statistics - RecordInHistogram(ioptions_.statistics, NUM_FILES_IN_SINGLE_COMPACTION, - c->inputs(0)->size()); + size_t num_files = 0; + for (auto& each_level : *c->inputs()) { + num_files += each_level.files.size(); + } + RecordInHistogram(ioptions_.stats, NUM_FILES_IN_SINGLE_COMPACTION, num_files); picker_->RegisterCompaction(c); vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); @@ -737,6 +727,19 @@ cf_name_.c_str(), file_num_buf); } + std::vector grandparents; + // Include grandparents for potential file cutting in incremental + // mode. It is for aligning file cutting boundaries across levels, + // so that subsequent compactions can pick files with aligned + // buffer. + // Single files are only picked up in incremental mode, so that + // there is no need for full range. + if (mutable_cf_options_.compaction_options_universal.incremental && + first_index_after < sorted_runs_.size() && + sorted_runs_[first_index_after].level > 1) { + grandparents = vstorage_->LevelFiles(sorted_runs_[first_index_after].level); + } + CompactionReason compaction_reason; if (max_number_of_files_to_compact == UINT_MAX) { compaction_reason = CompactionReason::kUniversalSizeRatio; @@ -744,21 +747,22 @@ compaction_reason = CompactionReason::kUniversalSortedRunNum; } return new Compaction( - vstorage_, ioptions_, mutable_cf_options_, std::move(inputs), - output_level, + vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_, + std::move(inputs), output_level, MaxFileSizeForLevel(mutable_cf_options_, output_level, kCompactionStyleUniversal), - LLONG_MAX, path_id, + GetMaxOverlappingBytes(), path_id, GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, start_level, 1, enable_compression), - GetCompressionOptions(ioptions_, vstorage_, start_level, + GetCompressionOptions(mutable_cf_options_, vstorage_, start_level, enable_compression), - /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false, - score_, false /* deletion_compaction */, compaction_reason); + Temperature::kUnknown, + /* max_subcompactions */ 0, grandparents, /* is manual */ false, score_, + false /* deletion_compaction */, compaction_reason); } // Look at overall size amplification. If size amplification -// exceeeds the configured value, then do a compaction +// exceeds the configured value, then do a compaction // of the candidate files all the way upto the earliest // base file (overrides configured values of file-size ratios, // min_merge_width and max_merge_width). @@ -779,7 +783,7 @@ } // Skip files that are already being compacted - for (size_t loop = 0; loop < sorted_runs_.size() - 1; loop++) { + for (size_t loop = 0; loop + 1 < sorted_runs_.size(); loop++) { sr = &sorted_runs_[loop]; if (!sr->being_compacted) { start_index = loop; // Consider this as the first candidate. @@ -807,9 +811,11 @@ } // keep adding up all the remaining files - for (size_t loop = start_index; loop < sorted_runs_.size() - 1; loop++) { + for (size_t loop = start_index; loop + 1 < sorted_runs_.size(); loop++) { sr = &sorted_runs_[loop]; if (sr->being_compacted) { + // TODO with incremental compaction is supported, we might want to + // schedule some incremental compactions in parallel if needed. char file_num_buf[kFormatFileNumberBufSize]; sr->Dump(file_num_buf, sizeof(file_num_buf), true); ROCKS_LOG_BUFFER( @@ -843,34 +849,288 @@ " earliest-file-size %" PRIu64, cf_name_.c_str(), candidate_size, earliest_file_size); } + // Since incremental compaction can't include more than second last + // level, it can introduce penalty, compared to full compaction. We + // hard code the pentalty to be 80%. If we end up with a compaction + // fanout higher than 80% of full level compactions, we fall back + // to full level compaction. + // The 80% threshold is arbitrary and can be adjusted or made + // configurable in the future. + // This also prevent the case when compaction falls behind and we + // need to compact more levels for compactions to catch up. + if (mutable_cf_options_.compaction_options_universal.incremental) { + double fanout_threshold = static_cast(earliest_file_size) / + static_cast(candidate_size) * 1.8; + Compaction* picked = PickIncrementalForReduceSizeAmp(fanout_threshold); + if (picked != nullptr) { + // As the feature is still incremental, picking incremental compaction + // might fail and we will fall bck to compacting full level. + return picked; + } + } return PickCompactionToOldest(start_index, CompactionReason::kUniversalSizeAmplification); } +Compaction* UniversalCompactionBuilder::PickIncrementalForReduceSizeAmp( + double fanout_threshold) { + // Try find all potential compactions with total size just over + // options.max_compaction_size / 2, and take the one with the lowest + // fanout (defined in declaration of the function). + // This is done by having a sliding window of the files at the second + // lowest level, and keep expanding while finding overlapping in the + // last level. Once total size exceeds the size threshold, calculate + // the fanout value. And then shrinking from the small side of the + // window. Keep doing it until the end. + // Finally, we try to include upper level files if they fall into + // the range. + // + // Note that it is a similar problem as leveled compaction's + // kMinOverlappingRatio priority, but instead of picking single files + // we expand to a target compaction size. The reason is that in + // leveled compaction, actual fanout value tends to high, e.g. 10, so + // even with single file in down merging level, the extra size + // compacted in boundary files is at a lower ratio. But here users + // often have size of second last level size to be 1/4, 1/3 or even + // 1/2 of the bottommost level, so picking single file in second most + // level will cause significant waste, which is not desirable. + // + // This algorithm has lots of room to improve to pick more efficient + // compactions. + assert(sorted_runs_.size() >= 2); + int second_last_level = sorted_runs_[sorted_runs_.size() - 2].level; + if (second_last_level == 0) { + // Can't split Level 0. + return nullptr; + } + int output_level = sorted_runs_.back().level; + const std::vector& bottom_files = + vstorage_->LevelFiles(output_level); + const std::vector& files = + vstorage_->LevelFiles(second_last_level); + assert(!bottom_files.empty()); + assert(!files.empty()); + + // std::unordered_map file_to_order; + + int picked_start_idx = 0; + int picked_end_idx = 0; + double picked_fanout = fanout_threshold; + + // Use half target compaction bytes as anchor to stop growing second most + // level files, and reserve growing space for more overlapping bottom level, + // clean cut, files from other levels, etc. + uint64_t comp_thres_size = mutable_cf_options_.max_compaction_bytes / 2; + int start_idx = 0; + int bottom_end_idx = 0; + int bottom_start_idx = 0; + uint64_t non_bottom_size = 0; + uint64_t bottom_size = 0; + bool end_bottom_size_counted = false; + for (int end_idx = 0; end_idx < static_cast(files.size()); end_idx++) { + FileMetaData* end_file = files[end_idx]; + + // Include bottom most level files smaller than the current second + // last level file. + int num_skipped = 0; + while (bottom_end_idx < static_cast(bottom_files.size()) && + icmp_->Compare(bottom_files[bottom_end_idx]->largest, + end_file->smallest) < 0) { + if (!end_bottom_size_counted) { + bottom_size += bottom_files[bottom_end_idx]->fd.file_size; + } + bottom_end_idx++; + end_bottom_size_counted = false; + num_skipped++; + } + + if (num_skipped > 1) { + // At least a file in the bottom most level falls into the file gap. No + // reason to include the file. We cut the range and start a new sliding + // window. + start_idx = end_idx; + } + + if (start_idx == end_idx) { + // new sliding window. + non_bottom_size = 0; + bottom_size = 0; + bottom_start_idx = bottom_end_idx; + end_bottom_size_counted = false; + } + + non_bottom_size += end_file->fd.file_size; + + // Include all overlapping files in bottom level. + while (bottom_end_idx < static_cast(bottom_files.size()) && + icmp_->Compare(bottom_files[bottom_end_idx]->smallest, + end_file->largest) < 0) { + if (!end_bottom_size_counted) { + bottom_size += bottom_files[bottom_end_idx]->fd.file_size; + end_bottom_size_counted = true; + } + if (icmp_->Compare(bottom_files[bottom_end_idx]->largest, + end_file->largest) > 0) { + // next level file cross large boundary of current file. + break; + } + bottom_end_idx++; + end_bottom_size_counted = false; + } + + if ((non_bottom_size + bottom_size > comp_thres_size || + end_idx == static_cast(files.size()) - 1) && + non_bottom_size > 0) { // Do we alow 0 size file at all? + // If it is a better compaction, remember it in picked* variables. + double fanout = static_cast(bottom_size) / + static_cast(non_bottom_size); + if (fanout < picked_fanout) { + picked_start_idx = start_idx; + picked_end_idx = end_idx; + picked_fanout = fanout; + } + // Shrink from the start end to under comp_thres_size + while (non_bottom_size + bottom_size > comp_thres_size && + start_idx <= end_idx) { + non_bottom_size -= files[start_idx]->fd.file_size; + start_idx++; + if (start_idx < static_cast(files.size())) { + while (bottom_start_idx <= bottom_end_idx && + icmp_->Compare(bottom_files[bottom_start_idx]->largest, + files[start_idx]->smallest) < 0) { + bottom_size -= bottom_files[bottom_start_idx]->fd.file_size; + bottom_start_idx++; + } + } + } + } + } + + if (picked_fanout >= fanout_threshold) { + assert(picked_fanout == fanout_threshold); + return nullptr; + } + + std::vector inputs; + CompactionInputFiles bottom_level_inputs; + CompactionInputFiles second_last_level_inputs; + second_last_level_inputs.level = second_last_level; + bottom_level_inputs.level = output_level; + for (int i = picked_start_idx; i <= picked_end_idx; i++) { + if (files[i]->being_compacted) { + return nullptr; + } + second_last_level_inputs.files.push_back(files[i]); + } + assert(!second_last_level_inputs.empty()); + if (!picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &second_last_level_inputs, + /*next_smallest=*/nullptr)) { + return nullptr; + } + // We might be able to avoid this binary search if we save and expand + // from bottom_start_idx and bottom_end_idx, but for now, we use + // SetupOtherInputs() for simplicity. + int parent_index = -1; // Create and use bottom_start_idx? + if (!picker_->SetupOtherInputs(cf_name_, mutable_cf_options_, vstorage_, + &second_last_level_inputs, + &bottom_level_inputs, &parent_index, + /*base_index=*/-1)) { + return nullptr; + } + + // Try to include files in upper levels if they fall into the range. + // Since we need to go from lower level up and this is in the reverse + // order, compared to level order, we first write to an reversed + // data structure and finally copy them to compaction inputs. + InternalKey smallest, largest; + picker_->GetRange(second_last_level_inputs, &smallest, &largest); + std::vector inputs_reverse; + for (auto it = ++(++sorted_runs_.rbegin()); it != sorted_runs_.rend(); it++) { + SortedRun& sr = *it; + if (sr.level == 0) { + break; + } + std::vector level_inputs; + vstorage_->GetCleanInputsWithinInterval(sr.level, &smallest, &largest, + &level_inputs); + if (!level_inputs.empty()) { + inputs_reverse.push_back({}); + inputs_reverse.back().level = sr.level; + inputs_reverse.back().files = level_inputs; + picker_->GetRange(inputs_reverse.back(), &smallest, &largest); + } + } + for (auto it = inputs_reverse.rbegin(); it != inputs_reverse.rend(); it++) { + inputs.push_back(*it); + } + + inputs.push_back(second_last_level_inputs); + inputs.push_back(bottom_level_inputs); + + // TODO support multi paths? + uint32_t path_id = 0; + return new Compaction( + vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_, + std::move(inputs), output_level, + MaxFileSizeForLevel(mutable_cf_options_, output_level, + kCompactionStyleUniversal), + GetMaxOverlappingBytes(), path_id, + GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, + output_level, 1, true /* enable_compression */), + GetCompressionOptions(mutable_cf_options_, vstorage_, output_level, + true /* enable_compression */), + Temperature::kUnknown, + /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false, + score_, false /* deletion_compaction */, + CompactionReason::kUniversalSizeAmplification); +} + // Pick files marked for compaction. Typically, files are marked by // CompactOnDeleteCollector due to the presence of tombstones. Compaction* UniversalCompactionBuilder::PickDeleteTriggeredCompaction() { CompactionInputFiles start_level_inputs; int output_level; std::vector inputs; + std::vector grandparents; if (vstorage_->num_levels() == 1) { // This is single level universal. Since we're basically trying to reclaim // space by processing files marked for compaction due to high tombstone // density, let's do the same thing as compaction to reduce size amp which // has the same goals. - bool compact = false; + int start_index = -1; start_level_inputs.level = 0; start_level_inputs.files.clear(); output_level = 0; - for (FileMetaData* f : vstorage_->LevelFiles(0)) { - if (f->marked_for_compaction) { - compact = true; + // Find the first file marked for compaction. Ignore the last file + for (size_t loop = 0; loop + 1 < sorted_runs_.size(); loop++) { + SortedRun* sr = &sorted_runs_[loop]; + if (sr->being_compacted) { + continue; } - if (compact) { + FileMetaData* f = vstorage_->LevelFiles(0)[loop]; + if (f->marked_for_compaction) { start_level_inputs.files.push_back(f); + start_index = + static_cast(loop); // Consider this as the first candidate. + break; + } + } + if (start_index < 0) { + // Either no file marked, or they're already being compacted + return nullptr; + } + + for (size_t loop = start_index + 1; loop < sorted_runs_.size(); loop++) { + SortedRun* sr = &sorted_runs_[loop]; + if (sr->being_compacted) { + break; } + + FileMetaData* f = vstorage_->LevelFiles(0)[loop]; + start_level_inputs.files.push_back(f); } if (start_level_inputs.size() <= 1) { // If only the last file in L0 is marked for compaction, ignore it @@ -939,6 +1199,9 @@ if (picker_->FilesRangeOverlapWithCompaction(inputs, output_level)) { return nullptr; } + + picker_->GetGrandparents(vstorage_, start_level_inputs, + output_level_inputs, &grandparents); } else { inputs.push_back(start_level_inputs); } @@ -952,16 +1215,17 @@ uint32_t path_id = GetPathId(ioptions_, mutable_cf_options_, estimated_total_size); return new Compaction( - vstorage_, ioptions_, mutable_cf_options_, std::move(inputs), - output_level, + vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_, + std::move(inputs), output_level, MaxFileSizeForLevel(mutable_cf_options_, output_level, kCompactionStyleUniversal), - /* max_grandparent_overlap_bytes */ LLONG_MAX, path_id, + /* max_grandparent_overlap_bytes */ GetMaxOverlappingBytes(), path_id, GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, output_level, 1), - GetCompressionOptions(ioptions_, vstorage_, output_level), - /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ true, - score_, false /* deletion_compaction */, + GetCompressionOptions(mutable_cf_options_, vstorage_, output_level), + Temperature::kUnknown, + /* max_subcompactions */ 0, grandparents, /* is manual */ false, score_, + false /* deletion_compaction */, CompactionReason::kFilesMarkedForCompaction); } @@ -1001,6 +1265,9 @@ comp_reason_print_string = "size amp"; } else { assert(false); + comp_reason_print_string = "unknown: "; + comp_reason_print_string.append( + std::to_string(static_cast(compaction_reason))); } char file_num_buf[256]; @@ -1022,15 +1289,16 @@ // compaction_options_universal.compression_size_percent, // because we always compact all the files, so always compress. return new Compaction( - vstorage_, ioptions_, mutable_cf_options_, std::move(inputs), - output_level, + vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_, + std::move(inputs), output_level, MaxFileSizeForLevel(mutable_cf_options_, output_level, kCompactionStyleUniversal), - LLONG_MAX, path_id, - GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, start_level, - 1, true /* enable_compression */), - GetCompressionOptions(ioptions_, vstorage_, start_level, + GetMaxOverlappingBytes(), path_id, + GetCompressionType(ioptions_, vstorage_, mutable_cf_options_, + output_level, 1, true /* enable_compression */), + GetCompressionOptions(mutable_cf_options_, vstorage_, output_level, true /* enable_compression */), + Temperature::kUnknown, /* max_subcompactions */ 0, /* grandparents */ {}, /* is manual */ false, score_, false /* deletion_compaction */, compaction_reason); } @@ -1100,6 +1368,17 @@ return c; } + +uint64_t UniversalCompactionBuilder::GetMaxOverlappingBytes() const { + if (!mutable_cf_options_.compaction_options_universal.incremental) { + return port::kMaxUint64; + } else { + // Try to align cutting boundary with files at the next level if the + // file isn't end up with 1/2 of target size, or it would overlap + // with two full size files at the next level. + return mutable_cf_options_.target_file_size_base / 2 * 3; + } +} } // namespace ROCKSDB_NAMESPACE #endif // !ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h 2025-05-19 16:14:27.000000000 +0000 @@ -15,12 +15,13 @@ namespace ROCKSDB_NAMESPACE { class UniversalCompactionPicker : public CompactionPicker { public: - UniversalCompactionPicker(const ImmutableCFOptions& ioptions, + UniversalCompactionPicker(const ImmutableOptions& ioptions, const InternalKeyComparator* icmp) : CompactionPicker(ioptions, icmp) {} virtual Compaction* PickCompaction( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, - VersionStorageInfo* vstorage, LogBuffer* log_buffer, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer, SequenceNumber earliest_memtable_seqno = kMaxSequenceNumber) override; virtual int MaxOutputLevel() const override { return NumberLevels() - 1; } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_service_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_service_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/compaction_service_test.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/compaction_service_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,825 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE + +#include "db/db_test_util.h" +#include "port/stack_trace.h" + +namespace ROCKSDB_NAMESPACE { + +class TestCompactionServiceBase { + public: + virtual int GetCompactionNum() = 0; + + void OverrideStartStatus(CompactionServiceJobStatus s) { + is_override_start_status = true; + override_start_status = s; + } + + void OverrideWaitStatus(CompactionServiceJobStatus s) { + is_override_wait_status = true; + override_wait_status = s; + } + + void OverrideWaitResult(std::string str) { + is_override_wait_result = true; + override_wait_result = std::move(str); + } + + void ResetOverride() { + is_override_wait_result = false; + is_override_start_status = false; + is_override_wait_status = false; + } + + virtual ~TestCompactionServiceBase() = default; + + protected: + bool is_override_start_status = false; + CompactionServiceJobStatus override_start_status = + CompactionServiceJobStatus::kFailure; + bool is_override_wait_status = false; + CompactionServiceJobStatus override_wait_status = + CompactionServiceJobStatus::kFailure; + bool is_override_wait_result = false; + std::string override_wait_result; +}; + +class MyTestCompactionServiceLegacy : public CompactionService, + public TestCompactionServiceBase { + public: + MyTestCompactionServiceLegacy(std::string db_path, Options& options, + std::shared_ptr& statistics) + : db_path_(std::move(db_path)), + options_(options), + statistics_(statistics) {} + + static const char* kClassName() { return "MyTestCompactionServiceLegacy"; } + + const char* Name() const override { return kClassName(); } + + CompactionServiceJobStatus Start(const std::string& compaction_service_input, + uint64_t job_id) override { + InstrumentedMutexLock l(&mutex_); + jobs_.emplace(job_id, compaction_service_input); + CompactionServiceJobStatus s = CompactionServiceJobStatus::kSuccess; + if (is_override_start_status) { + return override_start_status; + } + return s; + } + + CompactionServiceJobStatus WaitForComplete( + uint64_t job_id, std::string* compaction_service_result) override { + std::string compaction_input; + { + InstrumentedMutexLock l(&mutex_); + auto i = jobs_.find(job_id); + if (i == jobs_.end()) { + return CompactionServiceJobStatus::kFailure; + } + compaction_input = std::move(i->second); + jobs_.erase(i); + } + + if (is_override_wait_status) { + return override_wait_status; + } + + CompactionServiceOptionsOverride options_override; + options_override.env = options_.env; + options_override.file_checksum_gen_factory = + options_.file_checksum_gen_factory; + options_override.comparator = options_.comparator; + options_override.merge_operator = options_.merge_operator; + options_override.compaction_filter = options_.compaction_filter; + options_override.compaction_filter_factory = + options_.compaction_filter_factory; + options_override.prefix_extractor = options_.prefix_extractor; + options_override.table_factory = options_.table_factory; + options_override.sst_partitioner_factory = options_.sst_partitioner_factory; + options_override.statistics = statistics_; + + Status s = DB::OpenAndCompact( + db_path_, db_path_ + "/" + ROCKSDB_NAMESPACE::ToString(job_id), + compaction_input, compaction_service_result, options_override); + if (is_override_wait_result) { + *compaction_service_result = override_wait_result; + } + compaction_num_.fetch_add(1); + if (s.ok()) { + return CompactionServiceJobStatus::kSuccess; + } else { + return CompactionServiceJobStatus::kFailure; + } + } + + int GetCompactionNum() override { return compaction_num_.load(); } + + private: + InstrumentedMutex mutex_; + std::atomic_int compaction_num_{0}; + std::map jobs_; + const std::string db_path_; + Options options_; + std::shared_ptr statistics_; +}; + +class MyTestCompactionService : public CompactionService, + public TestCompactionServiceBase { + public: + MyTestCompactionService(std::string db_path, Options& options, + std::shared_ptr& statistics) + : db_path_(std::move(db_path)), + options_(options), + statistics_(statistics), + start_info_("na", "na", "na", 0, Env::TOTAL), + wait_info_("na", "na", "na", 0, Env::TOTAL) {} + + static const char* kClassName() { return "MyTestCompactionService"; } + + const char* Name() const override { return kClassName(); } + + CompactionServiceJobStatus StartV2( + const CompactionServiceJobInfo& info, + const std::string& compaction_service_input) override { + InstrumentedMutexLock l(&mutex_); + start_info_ = info; + assert(info.db_name == db_path_); + jobs_.emplace(info.job_id, compaction_service_input); + CompactionServiceJobStatus s = CompactionServiceJobStatus::kSuccess; + if (is_override_start_status) { + return override_start_status; + } + return s; + } + + CompactionServiceJobStatus WaitForCompleteV2( + const CompactionServiceJobInfo& info, + std::string* compaction_service_result) override { + std::string compaction_input; + assert(info.db_name == db_path_); + { + InstrumentedMutexLock l(&mutex_); + wait_info_ = info; + auto i = jobs_.find(info.job_id); + if (i == jobs_.end()) { + return CompactionServiceJobStatus::kFailure; + } + compaction_input = std::move(i->second); + jobs_.erase(i); + } + + if (is_override_wait_status) { + return override_wait_status; + } + + CompactionServiceOptionsOverride options_override; + options_override.env = options_.env; + options_override.file_checksum_gen_factory = + options_.file_checksum_gen_factory; + options_override.comparator = options_.comparator; + options_override.merge_operator = options_.merge_operator; + options_override.compaction_filter = options_.compaction_filter; + options_override.compaction_filter_factory = + options_.compaction_filter_factory; + options_override.prefix_extractor = options_.prefix_extractor; + options_override.table_factory = options_.table_factory; + options_override.sst_partitioner_factory = options_.sst_partitioner_factory; + options_override.statistics = statistics_; + + Status s = DB::OpenAndCompact( + db_path_, db_path_ + "/" + ROCKSDB_NAMESPACE::ToString(info.job_id), + compaction_input, compaction_service_result, options_override); + if (is_override_wait_result) { + *compaction_service_result = override_wait_result; + } + compaction_num_.fetch_add(1); + if (s.ok()) { + return CompactionServiceJobStatus::kSuccess; + } else { + return CompactionServiceJobStatus::kFailure; + } + } + + int GetCompactionNum() override { return compaction_num_.load(); } + + CompactionServiceJobInfo GetCompactionInfoForStart() { return start_info_; } + CompactionServiceJobInfo GetCompactionInfoForWait() { return wait_info_; } + + private: + InstrumentedMutex mutex_; + std::atomic_int compaction_num_{0}; + std::map jobs_; + const std::string db_path_; + Options options_; + std::shared_ptr statistics_; + CompactionServiceJobInfo start_info_; + CompactionServiceJobInfo wait_info_; +}; + +// This is only for listing test classes +enum TestCompactionServiceType { + MyTestCompactionServiceType, + MyTestCompactionServiceLegacyType, +}; + +class CompactionServiceTest + : public DBTestBase, + public testing::WithParamInterface { + public: + explicit CompactionServiceTest() + : DBTestBase("compaction_service_test", true) {} + + protected: + void ReopenWithCompactionService(Options* options) { + options->env = env_; + primary_statistics_ = CreateDBStatistics(); + options->statistics = primary_statistics_; + compactor_statistics_ = CreateDBStatistics(); + TestCompactionServiceType cs_type = GetParam(); + switch (cs_type) { + case MyTestCompactionServiceType: + compaction_service_ = std::make_shared( + dbname_, *options, compactor_statistics_); + break; + case MyTestCompactionServiceLegacyType: + compaction_service_ = std::make_shared( + dbname_, *options, compactor_statistics_); + break; + default: + assert(false); + } + options->compaction_service = compaction_service_; + DestroyAndReopen(*options); + } + + Statistics* GetCompactorStatistics() { return compactor_statistics_.get(); } + + Statistics* GetPrimaryStatistics() { return primary_statistics_.get(); } + + TestCompactionServiceBase* GetCompactionService() { + CompactionService* cs = compaction_service_.get(); + return dynamic_cast(cs); + } + + void GenerateTestData() { + // Generate 20 files @ L2 + for (int i = 0; i < 20; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 10 + j; + ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + MoveFilesToLevel(2); + + // Generate 10 files @ L1 overlap with all 20 files @ L2 + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 20 + j * 2; + ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + MoveFilesToLevel(1); + ASSERT_EQ(FilesPerLevel(), "0,10,20"); + } + + void VerifyTestData() { + for (int i = 0; i < 200; i++) { + auto result = Get(Key(i)); + if (i % 2) { + ASSERT_EQ(result, "value" + ToString(i)); + } else { + ASSERT_EQ(result, "value_new" + ToString(i)); + } + } + } + + private: + std::shared_ptr compactor_statistics_; + std::shared_ptr primary_statistics_; + std::shared_ptr compaction_service_; +}; + +TEST_P(CompactionServiceTest, BasicCompactions) { + Options options = CurrentOptions(); + ReopenWithCompactionService(&options); + + Statistics* primary_statistics = GetPrimaryStatistics(); + Statistics* compactor_statistics = GetCompactorStatistics(); + + for (int i = 0; i < 20; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 10 + j; + ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 20 + j * 2; + ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + // verify result + for (int i = 0; i < 200; i++) { + auto result = Get(Key(i)); + if (i % 2) { + ASSERT_EQ(result, "value" + ToString(i)); + } else { + ASSERT_EQ(result, "value_new" + ToString(i)); + } + } + auto my_cs = GetCompactionService(); + ASSERT_GE(my_cs->GetCompactionNum(), 1); + + // make sure the compaction statistics is only recorded on the remote side + ASSERT_GE(compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES), 1); + ASSERT_GE(compactor_statistics->getTickerCount(COMPACT_READ_BYTES), 1); + ASSERT_EQ(primary_statistics->getTickerCount(COMPACT_WRITE_BYTES), 0); + // even with remote compaction, primary host still needs to read SST files to + // `verify_table()`. + ASSERT_GE(primary_statistics->getTickerCount(COMPACT_READ_BYTES), 1); + // all the compaction write happens on the remote side + ASSERT_EQ(primary_statistics->getTickerCount(REMOTE_COMPACT_WRITE_BYTES), + compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES)); + ASSERT_GE(primary_statistics->getTickerCount(REMOTE_COMPACT_READ_BYTES), 1); + ASSERT_GT(primary_statistics->getTickerCount(COMPACT_READ_BYTES), + primary_statistics->getTickerCount(REMOTE_COMPACT_READ_BYTES)); + // compactor is already the remote side, which doesn't have remote + ASSERT_EQ(compactor_statistics->getTickerCount(REMOTE_COMPACT_READ_BYTES), 0); + ASSERT_EQ(compactor_statistics->getTickerCount(REMOTE_COMPACT_WRITE_BYTES), + 0); + + // Test failed compaction + SyncPoint::GetInstance()->SetCallBack( + "DBImplSecondary::CompactWithoutInstallation::End", [&](void* status) { + // override job status + auto s = static_cast(status); + *s = Status::Aborted("MyTestCompactionService failed to compact!"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + Status s; + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 20 + j * 2; + s = Put(Key(key_id), "value_new" + ToString(key_id)); + if (s.IsAborted()) { + break; + } + } + if (s.IsAborted()) { + break; + } + s = Flush(); + if (s.IsAborted()) { + break; + } + s = dbfull()->TEST_WaitForCompact(); + if (s.IsAborted()) { + break; + } + } + ASSERT_TRUE(s.IsAborted()); +} + +TEST_P(CompactionServiceTest, ManualCompaction) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + ReopenWithCompactionService(&options); + GenerateTestData(); + + auto my_cs = GetCompactionService(); + + std::string start_str = Key(15); + std::string end_str = Key(45); + Slice start(start_str); + Slice end(end_str); + uint64_t comp_num = my_cs->GetCompactionNum(); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end)); + ASSERT_GE(my_cs->GetCompactionNum(), comp_num + 1); + VerifyTestData(); + + start_str = Key(120); + start = start_str; + comp_num = my_cs->GetCompactionNum(); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, nullptr)); + ASSERT_GE(my_cs->GetCompactionNum(), comp_num + 1); + VerifyTestData(); + + end_str = Key(92); + end = end_str; + comp_num = my_cs->GetCompactionNum(); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, &end)); + ASSERT_GE(my_cs->GetCompactionNum(), comp_num + 1); + VerifyTestData(); + + comp_num = my_cs->GetCompactionNum(); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_GE(my_cs->GetCompactionNum(), comp_num + 1); + VerifyTestData(); +} + +TEST_P(CompactionServiceTest, FailedToStart) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + ReopenWithCompactionService(&options); + + GenerateTestData(); + + auto my_cs = GetCompactionService(); + my_cs->OverrideStartStatus(CompactionServiceJobStatus::kFailure); + + std::string start_str = Key(15); + std::string end_str = Key(45); + Slice start(start_str); + Slice end(end_str); + Status s = db_->CompactRange(CompactRangeOptions(), &start, &end); + ASSERT_TRUE(s.IsIncomplete()); +} + +TEST_P(CompactionServiceTest, InvalidResult) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + ReopenWithCompactionService(&options); + + GenerateTestData(); + + auto my_cs = GetCompactionService(); + my_cs->OverrideWaitResult("Invalid Str"); + + std::string start_str = Key(15); + std::string end_str = Key(45); + Slice start(start_str); + Slice end(end_str); + Status s = db_->CompactRange(CompactRangeOptions(), &start, &end); + ASSERT_FALSE(s.ok()); +} + +TEST_P(CompactionServiceTest, SubCompaction) { + Options options = CurrentOptions(); + options.max_subcompactions = 10; + options.target_file_size_base = 1 << 10; // 1KB + options.disable_auto_compactions = true; + ReopenWithCompactionService(&options); + + GenerateTestData(); + VerifyTestData(); + + auto my_cs = GetCompactionService(); + int compaction_num_before = my_cs->GetCompactionNum(); + + auto cro = CompactRangeOptions(); + cro.max_subcompactions = 10; + Status s = db_->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(s); + VerifyTestData(); + int compaction_num = my_cs->GetCompactionNum() - compaction_num_before; + // make sure there's sub-compaction by checking the compaction number + ASSERT_GE(compaction_num, 2); +} + +class PartialDeleteCompactionFilter : public CompactionFilter { + public: + CompactionFilter::Decision FilterV2( + int /*level*/, const Slice& key, ValueType /*value_type*/, + const Slice& /*existing_value*/, std::string* /*new_value*/, + std::string* /*skip_until*/) const override { + int i = std::stoi(key.ToString().substr(3)); + if (i > 5 && i <= 105) { + return CompactionFilter::Decision::kRemove; + } + return CompactionFilter::Decision::kKeep; + } + + const char* Name() const override { return "PartialDeleteCompactionFilter"; } +}; + +TEST_P(CompactionServiceTest, CompactionFilter) { + Options options = CurrentOptions(); + std::unique_ptr delete_comp_filter( + new PartialDeleteCompactionFilter()); + options.compaction_filter = delete_comp_filter.get(); + ReopenWithCompactionService(&options); + + for (int i = 0; i < 20; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 10 + j; + ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 20 + j * 2; + ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + + // verify result + for (int i = 0; i < 200; i++) { + auto result = Get(Key(i)); + if (i > 5 && i <= 105) { + ASSERT_EQ(result, "NOT_FOUND"); + } else if (i % 2) { + ASSERT_EQ(result, "value" + ToString(i)); + } else { + ASSERT_EQ(result, "value_new" + ToString(i)); + } + } + auto my_cs = GetCompactionService(); + ASSERT_GE(my_cs->GetCompactionNum(), 1); +} + +TEST_P(CompactionServiceTest, Snapshot) { + Options options = CurrentOptions(); + ReopenWithCompactionService(&options); + + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value1")); + const Snapshot* s1 = db_->GetSnapshot(); + ASSERT_OK(Flush()); + + ASSERT_OK(Put(Key(1), "value2")); + ASSERT_OK(Put(Key(3), "value2")); + ASSERT_OK(Flush()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + auto my_cs = GetCompactionService(); + ASSERT_GE(my_cs->GetCompactionNum(), 1); + ASSERT_EQ("value1", Get(Key(1), s1)); + ASSERT_EQ("value2", Get(Key(1))); + db_->ReleaseSnapshot(s1); +} + +TEST_P(CompactionServiceTest, ConcurrentCompaction) { + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 100; + options.max_background_jobs = 20; + ReopenWithCompactionService(&options); + GenerateTestData(); + + ColumnFamilyMetaData meta; + db_->GetColumnFamilyMetaData(&meta); + + std::vector threads; + for (const auto& file : meta.levels[1].files) { + threads.push_back(std::thread([&]() { + std::string fname = file.db_path + "/" + file.name; + ASSERT_OK(db_->CompactFiles(CompactionOptions(), {fname}, 2)); + })); + } + + for (auto& thread : threads) { + thread.join(); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + // verify result + for (int i = 0; i < 200; i++) { + auto result = Get(Key(i)); + if (i % 2) { + ASSERT_EQ(result, "value" + ToString(i)); + } else { + ASSERT_EQ(result, "value_new" + ToString(i)); + } + } + auto my_cs = GetCompactionService(); + ASSERT_EQ(my_cs->GetCompactionNum(), 10); + ASSERT_EQ(FilesPerLevel(), "0,0,10"); +} + +TEST_P(CompactionServiceTest, CompactionInfo) { + // only test compaction info for new compaction service interface + if (GetParam() != MyTestCompactionServiceType) { + return; + } + + Options options = CurrentOptions(); + ReopenWithCompactionService(&options); + + for (int i = 0; i < 20; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 10 + j; + ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 20 + j * 2; + ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + auto my_cs = + static_cast_with_check(GetCompactionService()); + uint64_t comp_num = my_cs->GetCompactionNum(); + ASSERT_GE(comp_num, 1); + + CompactionServiceJobInfo info = my_cs->GetCompactionInfoForStart(); + ASSERT_EQ(dbname_, info.db_name); + std::string db_id, db_session_id; + ASSERT_OK(db_->GetDbIdentity(db_id)); + ASSERT_EQ(db_id, info.db_id); + ASSERT_OK(db_->GetDbSessionId(db_session_id)); + ASSERT_EQ(db_session_id, info.db_session_id); + ASSERT_EQ(Env::LOW, info.priority); + info = my_cs->GetCompactionInfoForWait(); + ASSERT_EQ(dbname_, info.db_name); + ASSERT_EQ(db_id, info.db_id); + ASSERT_EQ(db_session_id, info.db_session_id); + ASSERT_EQ(Env::LOW, info.priority); + + // Test priority USER + ColumnFamilyMetaData meta; + db_->GetColumnFamilyMetaData(&meta); + SstFileMetaData file = meta.levels[1].files[0]; + ASSERT_OK(db_->CompactFiles(CompactionOptions(), + {file.db_path + "/" + file.name}, 2)); + info = my_cs->GetCompactionInfoForStart(); + ASSERT_EQ(Env::USER, info.priority); + info = my_cs->GetCompactionInfoForWait(); + ASSERT_EQ(Env::USER, info.priority); + + // Test priority BOTTOM + env_->SetBackgroundThreads(1, Env::BOTTOM); + options.num_levels = 2; + ReopenWithCompactionService(&options); + my_cs = + static_cast_with_check(GetCompactionService()); + + for (int i = 0; i < 20; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 10 + j; + ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 20 + j * 2; + ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + info = my_cs->GetCompactionInfoForStart(); + ASSERT_EQ(Env::BOTTOM, info.priority); + info = my_cs->GetCompactionInfoForWait(); + ASSERT_EQ(Env::BOTTOM, info.priority); +} + +TEST_P(CompactionServiceTest, FallbackLocalAuto) { + Options options = CurrentOptions(); + ReopenWithCompactionService(&options); + + auto my_cs = GetCompactionService(); + Statistics* compactor_statistics = GetCompactorStatistics(); + Statistics* primary_statistics = GetPrimaryStatistics(); + uint64_t compactor_write_bytes = + compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES); + uint64_t primary_write_bytes = + primary_statistics->getTickerCount(COMPACT_WRITE_BYTES); + + my_cs->OverrideStartStatus(CompactionServiceJobStatus::kUseLocal); + + for (int i = 0; i < 20; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 10 + j; + ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + int key_id = i * 20 + j * 2; + ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id))); + } + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + // verify result + for (int i = 0; i < 200; i++) { + auto result = Get(Key(i)); + if (i % 2) { + ASSERT_EQ(result, "value" + ToString(i)); + } else { + ASSERT_EQ(result, "value_new" + ToString(i)); + } + } + + ASSERT_EQ(my_cs->GetCompactionNum(), 0); + + // make sure the compaction statistics is only recorded on the local side + ASSERT_EQ(compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES), + compactor_write_bytes); + ASSERT_GT(primary_statistics->getTickerCount(COMPACT_WRITE_BYTES), + primary_write_bytes); + ASSERT_EQ(primary_statistics->getTickerCount(REMOTE_COMPACT_READ_BYTES), 0); + ASSERT_EQ(primary_statistics->getTickerCount(REMOTE_COMPACT_WRITE_BYTES), 0); +} + +TEST_P(CompactionServiceTest, FallbackLocalManual) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + ReopenWithCompactionService(&options); + + GenerateTestData(); + VerifyTestData(); + + auto my_cs = GetCompactionService(); + Statistics* compactor_statistics = GetCompactorStatistics(); + Statistics* primary_statistics = GetPrimaryStatistics(); + uint64_t compactor_write_bytes = + compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES); + uint64_t primary_write_bytes = + primary_statistics->getTickerCount(COMPACT_WRITE_BYTES); + + // re-enable remote compaction + my_cs->ResetOverride(); + std::string start_str = Key(15); + std::string end_str = Key(45); + Slice start(start_str); + Slice end(end_str); + uint64_t comp_num = my_cs->GetCompactionNum(); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end)); + ASSERT_GE(my_cs->GetCompactionNum(), comp_num + 1); + // make sure the compaction statistics is only recorded on the remote side + ASSERT_GT(compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES), + compactor_write_bytes); + ASSERT_EQ(primary_statistics->getTickerCount(REMOTE_COMPACT_WRITE_BYTES), + compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES)); + ASSERT_EQ(primary_statistics->getTickerCount(COMPACT_WRITE_BYTES), + primary_write_bytes); + + // return run local again with API WaitForComplete + my_cs->OverrideWaitStatus(CompactionServiceJobStatus::kUseLocal); + start_str = Key(120); + start = start_str; + comp_num = my_cs->GetCompactionNum(); + compactor_write_bytes = + compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES); + primary_write_bytes = primary_statistics->getTickerCount(COMPACT_WRITE_BYTES); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, nullptr)); + ASSERT_EQ(my_cs->GetCompactionNum(), + comp_num); // no remote compaction is run + // make sure the compaction statistics is only recorded on the local side + ASSERT_EQ(compactor_statistics->getTickerCount(COMPACT_WRITE_BYTES), + compactor_write_bytes); + ASSERT_GT(primary_statistics->getTickerCount(COMPACT_WRITE_BYTES), + primary_write_bytes); + ASSERT_EQ(primary_statistics->getTickerCount(REMOTE_COMPACT_WRITE_BYTES), + compactor_write_bytes); + + // verify result after 2 manual compactions + VerifyTestData(); +} + +INSTANTIATE_TEST_CASE_P( + CompactionServiceTest, CompactionServiceTest, + ::testing::Values( + TestCompactionServiceType::MyTestCompactionServiceType, + TestCompactionServiceType::MyTestCompactionServiceLegacyType)); + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + RegisterCustomObjects(argc, argv); + return RUN_ALL_TESTS(); +} + +#else +#include + +int main(int /*argc*/, char** /*argv*/) { + fprintf(stderr, + "SKIPPED as CompactionService is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/file_pri.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/file_pri.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/file_pri.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/file_pri.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,92 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// + +#pragma once +#include + +#include "db/version_edit.h" + +namespace ROCKSDB_NAMESPACE { +// We boost files that are closer to TTL limit. This boosting could be +// through FileMetaData.compensated_file_size but this compensated size +// is widely used as something similar to file size so dramatically boost +// the value might cause unintended consequences. +// +// This boosting algorithm can go very fancy, but here we use a simple +// formula which can satisify: +// (1) Different levels are triggered slightly differently to avoid +// too many cascading cases +// (2) Files in the same level get boosting more when TTL gets closer. +// +// Don't do any boosting before TTL has past by half. This is to make +// sure lower write amp for most of the case. And all levels should be +// fully boosted when total TTL compaction threshold triggers. +// Differientiate boosting ranges of each level by 1/2. This will make +// range for each level exponentially increasing. We could do it by +// having them to be equal, or go even fancier. We can adjust it after +// we observe the behavior in production. +// The threshold starting boosting: +// +------------------------------------------------------------------ + +// ^ ^ ^ ^ ^ ^ +// Age 0 ... | | second last level thresold +// | | +// | third last level +// | +// forth last level +// +// We arbitrarily set with 0 when a file is aged boost_age_start and +// grow linearly. The ratio is arbitrarily set so that when the next level +// starts to boost, the previous level's boosting amount is 16. +class FileTtlBooster { + public: + FileTtlBooster(uint64_t current_time, uint64_t ttl, int num_non_empty_levels, + int level) + : current_time_(current_time) { + if (ttl == 0 || level == 0 || level >= num_non_empty_levels - 1) { + enabled_ = false; + boost_age_start_ = 0; + boost_step_ = 1; + } else { + enabled_ = true; + uint64_t all_boost_start_age = ttl / 2; + uint64_t all_boost_age_range = (ttl / 32) * 31 - all_boost_start_age; + uint64_t boost_age_range = + all_boost_age_range >> (num_non_empty_levels - level - 1); + boost_age_start_ = all_boost_start_age + boost_age_range; + const uint64_t kBoostRatio = 16; + // prevent 0 value to avoid divide 0 error. + boost_step_ = std::max(boost_age_range / kBoostRatio, uint64_t{1}); + } + } + + uint64_t GetBoostScore(FileMetaData* f) { + if (!enabled_) { + return 1; + } + uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime(); + if (oldest_ancester_time >= current_time_) { + return 1; + } + uint64_t age = current_time_ - oldest_ancester_time; + if (age > boost_age_start_) { + // Use integer just for convenience. + // We could make all file_to_order double if we want. + // Technically this can overflow if users override timing and + // give a very high current time. Ignore the case for simplicity. + // Boosting is addition to current value, so +1. This will effectively + // make boosting to kick in after the first boost_step_ is reached. + return (age - boost_age_start_) / boost_step_ + 1; + } + return 1; + } + + private: + bool enabled_; + uint64_t current_time_; + uint64_t boost_age_start_; + uint64_t boost_step_; +}; +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/sst_partitioner.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/sst_partitioner.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/compaction/sst_partitioner.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/compaction/sst_partitioner.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,90 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// + +#include "rocksdb/sst_partitioner.h" + +#include + +#include "rocksdb/utilities/customizable_util.h" +#include "rocksdb/utilities/object_registry.h" +#include "rocksdb/utilities/options_type.h" + +namespace ROCKSDB_NAMESPACE { +static std::unordered_map + sst_fixed_prefix_type_info = { +#ifndef ROCKSDB_LITE + {"length", + {0, OptionType::kSizeT, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, +#endif // ROCKSDB_LITE +}; + +SstPartitionerFixedPrefixFactory::SstPartitionerFixedPrefixFactory(size_t len) + : len_(len) { + RegisterOptions("Length", &len_, &sst_fixed_prefix_type_info); +} + +PartitionerResult SstPartitionerFixedPrefix::ShouldPartition( + const PartitionerRequest& request) { + Slice last_key_fixed(*request.prev_user_key); + if (last_key_fixed.size() > len_) { + last_key_fixed.size_ = len_; + } + Slice current_key_fixed(*request.current_user_key); + if (current_key_fixed.size() > len_) { + current_key_fixed.size_ = len_; + } + return last_key_fixed.compare(current_key_fixed) != 0 ? kRequired + : kNotRequired; +} + +bool SstPartitionerFixedPrefix::CanDoTrivialMove( + const Slice& smallest_user_key, const Slice& largest_user_key) { + return ShouldPartition(PartitionerRequest(smallest_user_key, largest_user_key, + 0)) == kNotRequired; +} + +std::unique_ptr +SstPartitionerFixedPrefixFactory::CreatePartitioner( + const SstPartitioner::Context& /* context */) const { + return std::unique_ptr(new SstPartitionerFixedPrefix(len_)); +} + +std::shared_ptr NewSstPartitionerFixedPrefixFactory( + size_t prefix_len) { + return std::make_shared(prefix_len); +} + +#ifndef ROCKSDB_LITE +namespace { +static int RegisterSstPartitionerFactories(ObjectLibrary& library, + const std::string& /*arg*/) { + library.AddFactory( + SstPartitionerFixedPrefixFactory::kClassName(), + [](const std::string& /*uri*/, + std::unique_ptr* guard, + std::string* /* errmsg */) { + guard->reset(new SstPartitionerFixedPrefixFactory(0)); + return guard->get(); + }); + return 1; +} +} // namespace +#endif // ROCKSDB_LITE + +Status SstPartitionerFactory::CreateFromString( + const ConfigOptions& options, const std::string& value, + std::shared_ptr* result) { +#ifndef ROCKSDB_LITE + static std::once_flag once; + std::call_once(once, [&]() { + RegisterSstPartitionerFactories(*(ObjectLibrary::Default().get()), ""); + }); +#endif // ROCKSDB_LITE + return LoadSharedObject(options, value, nullptr, + result); +} +} // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/comparator_db_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/comparator_db_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/comparator_db_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/comparator_db_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -13,10 +13,10 @@ #include "test_util/testutil.h" #include "util/hash.h" #include "util/kv_map.h" +#include "util/random.h" #include "util/string_util.h" #include "utilities/merge_operators.h" -using std::unique_ptr; namespace ROCKSDB_NAMESPACE { namespace { @@ -317,7 +317,7 @@ INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest, testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest, - testing::Values(test::kLatestFormatVersion)); + testing::Values(kLatestFormatVersion)); TEST_P(ComparatorDBTest, Bytewise) { for (int rand_seed = 301; rand_seed < 306; rand_seed++) { @@ -342,12 +342,12 @@ std::vector source_prefixes; // Randomly generate 5 prefixes for (int i = 0; i < 5; i++) { - source_prefixes.push_back(test::RandomHumanReadableString(&rnd, 8)); + source_prefixes.push_back(rnd.HumanReadableString(8)); } for (int j = 0; j < 20; j++) { int prefix_index = rnd.Uniform(static_cast(source_prefixes.size())); std::string key = source_prefixes[prefix_index] + - test::RandomHumanReadableString(&rnd, rnd.Uniform(8)); + rnd.HumanReadableString(rnd.Uniform(8)); source_strings.push_back(key); } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/convenience.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/convenience.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/convenience.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/convenience.cc 2025-05-19 16:14:27.000000000 +0000 @@ -14,7 +14,7 @@ namespace ROCKSDB_NAMESPACE { void CancelAllBackgroundWork(DB* db, bool wait) { - (static_cast_with_check(db->GetRootDB())) + (static_cast_with_check(db->GetRootDB())) ->CancelAllBackgroundWork(wait); } @@ -28,7 +28,7 @@ Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family, const RangePtr* ranges, size_t n, bool include_end) { - return (static_cast_with_check(db->GetRootDB())) + return (static_cast_with_check(db->GetRootDB())) ->DeleteFilesInRanges(column_family, ranges, n, include_end); } @@ -44,7 +44,7 @@ std::unique_ptr file; uint64_t file_size; InternalKeyComparator internal_comparator(options.comparator); - ImmutableCFOptions ioptions(options); + ImmutableOptions ioptions(options); Status s = ioptions.fs->NewRandomAccessFile(file_path, FileOptions(env_options), @@ -59,9 +59,10 @@ new RandomAccessFileReader(std::move(file), file_path)); const bool kImmortal = true; s = ioptions.table_factory->NewTableReader( - TableReaderOptions(ioptions, options.prefix_extractor.get(), env_options, + TableReaderOptions(ioptions, options.prefix_extractor, env_options, internal_comparator, false /* skip_filters */, - !kImmortal, -1 /* level */), + !kImmortal, false /* force_direct_prefetch */, + -1 /* level */), std::move(file_reader), file_size, &table_reader, false /* prefetch_index_and_filter_in_cache */); if (!s.ok()) { diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/corruption_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/corruption_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/corruption_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/corruption_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -9,37 +9,65 @@ #ifndef ROCKSDB_LITE -#include "rocksdb/db.h" - -#include #include #include #include + #include + #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" #include "db/log_format.h" #include "db/version_set.h" -#include "env/composite_env_wrapper.h" #include "file/filename.h" +#include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" +#include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/table.h" #include "rocksdb/write_batch.h" #include "table/block_based/block_based_table_builder.h" #include "table/meta_blocks.h" +#include "table/mock_table.h" #include "test_util/testharness.h" #include "test_util/testutil.h" +#include "util/cast_util.h" +#include "util/random.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { -static const int kValueSize = 1000; +static constexpr int kValueSize = 1000; +namespace { +// A wrapper that allows injection of errors. +class ErrorEnv : public EnvWrapper { + public: + bool writable_file_error_; + int num_writable_file_errors_; + explicit ErrorEnv(Env* _target) + : EnvWrapper(_target), + writable_file_error_(false), + num_writable_file_errors_(0) {} + const char* Name() const override { return "ErrorEnv"; } + + virtual Status NewWritableFile(const std::string& fname, + std::unique_ptr* result, + const EnvOptions& soptions) override { + result->reset(); + if (writable_file_error_) { + ++num_writable_file_errors_; + return Status::IOError(fname, "fake error"); + } + return target()->NewWritableFile(fname, result, soptions); + } +}; +} // namespace class CorruptionTest : public testing::Test { public: - test::ErrorEnv env_; + std::shared_ptr env_guard_; + ErrorEnv* env_; std::string dbname_; std::shared_ptr tiny_cache_; Options options_; @@ -50,10 +78,16 @@ // set it to 0), test SequenceNumberRecovery will fail, likely because of a // bug in recovery code. Keep it 4 for now to make the test passes. tiny_cache_ = NewLRUCache(100, 4); + Env* base_env = Env::Default(); + EXPECT_OK( + test::CreateEnvFromSystem(ConfigOptions(), &base_env, &env_guard_)); + EXPECT_NE(base_env, nullptr); + env_ = new ErrorEnv(base_env); options_.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; - options_.env = &env_; - dbname_ = test::PerThreadDBPath("corruption_test"); - DestroyDB(dbname_, options_); + options_.env = env_; + dbname_ = test::PerThreadDBPath(env_, "corruption_test"); + Status s = DestroyDB(dbname_, options_); + EXPECT_OK(s); db_ = nullptr; options_.create_if_missing = true; @@ -65,8 +99,19 @@ } ~CorruptionTest() override { + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->LoadDependency({}); + SyncPoint::GetInstance()->ClearAllCallBacks(); delete db_; - DestroyDB(dbname_, Options()); + db_ = nullptr; + if (getenv("KEEP_DB")) { + fprintf(stdout, "db is still at %s\n", dbname_.c_str()); + } else { + Options opts; + opts.env = env_->target(); + EXPECT_OK(DestroyDB(dbname_, opts)); + } + delete env_; } void CloseDb() { @@ -81,7 +126,7 @@ if (opt.env == Options().env) { // If env is not overridden, replace it with ErrorEnv. // Otherwise, the test already uses a non-default Env. - opt.env = &env_; + opt.env = env_; } opt.arena_block_size = 4096; BlockBasedTableOptions table_options; @@ -101,22 +146,24 @@ ASSERT_OK(::ROCKSDB_NAMESPACE::RepairDB(dbname_, options_)); } - void Build(int n, int flush_every = 0) { + void Build(int n, int start, int flush_every) { std::string key_space, value_space; WriteBatch batch; for (int i = 0; i < n; i++) { if (flush_every != 0 && i != 0 && i % flush_every == 0) { - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_FlushMemTable(); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); } //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n); - Slice key = Key(i, &key_space); + Slice key = Key(i + start, &key_space); batch.Clear(); - batch.Put(key, Value(i, &value_space)); + ASSERT_OK(batch.Put(key, Value(i + start, &value_space))); ASSERT_OK(db_->Write(WriteOptions(), &batch)); } } + void Build(int n, int flush_every = 0) { Build(n, 0, flush_every); } + void Check(int min_expected, int max_expected) { uint64_t next_expected = 0; uint64_t missed = 0; @@ -131,6 +178,7 @@ // occurred. Iterator* iter = db_->NewIterator(ReadOptions(false, true)); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); uint64_t key; Slice in(iter->key()); if (!ConsumeDecimalNumber(&in, &key) || @@ -147,6 +195,7 @@ correct++; } } + iter->status().PermitUncheckedError(); delete iter; fprintf(stderr, @@ -157,47 +206,10 @@ ASSERT_GE(max_expected, correct); } - void CorruptFile(const std::string& fname, int offset, int bytes_to_corrupt) { - struct stat sbuf; - if (stat(fname.c_str(), &sbuf) != 0) { - const char* msg = strerror(errno); - FAIL() << fname << ": " << msg; - } - - if (offset < 0) { - // Relative to end of file; make it absolute - if (-offset > sbuf.st_size) { - offset = 0; - } else { - offset = static_cast(sbuf.st_size + offset); - } - } - if (offset > sbuf.st_size) { - offset = static_cast(sbuf.st_size); - } - if (offset + bytes_to_corrupt > sbuf.st_size) { - bytes_to_corrupt = static_cast(sbuf.st_size - offset); - } - - // Do it - std::string contents; - Status s = ReadFileToString(Env::Default(), fname, &contents); - ASSERT_TRUE(s.ok()) << s.ToString(); - for (int i = 0; i < bytes_to_corrupt; i++) { - contents[i + offset] ^= 0x80; - } - s = WriteStringToFile(Env::Default(), contents, fname); - ASSERT_TRUE(s.ok()) << s.ToString(); - Options options; - EnvOptions env_options; - options.file_system.reset(new LegacyFileSystemWrapper(options.env)); - ASSERT_NOK(VerifySstFileChecksum(options, env_options, fname)); - } - void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) { // Pick file to corrupt std::vector filenames; - ASSERT_OK(env_.GetChildren(dbname_, &filenames)); + ASSERT_OK(env_->GetChildren(dbname_, &filenames)); uint64_t number; FileType type; std::string fname; @@ -212,7 +224,7 @@ } ASSERT_TRUE(!fname.empty()) << filetype; - CorruptFile(fname, offset, bytes_to_corrupt); + ASSERT_OK(test::CorruptFile(env_, fname, offset, bytes_to_corrupt)); } // corrupts exactly one file at level `level`. if no file found at level, @@ -222,7 +234,8 @@ db_->GetLiveFilesMetaData(&metadata); for (const auto& m : metadata) { if (m.level == level) { - CorruptFile(dbname_ + "/" + m.name, offset, bytes_to_corrupt); + ASSERT_OK(test::CorruptFile(env_, dbname_ + "/" + m.name, offset, + bytes_to_corrupt)); return; } } @@ -256,11 +269,11 @@ // preserves the implementation that was in place when all of the // magic values in this file were picked. *storage = std::string(kValueSize, ' '); - return Slice(*storage); } else { Random r(k); - return test::RandomString(&r, kValueSize, storage); + *storage = r.RandomString(kValueSize); } + return Slice(*storage); } }; @@ -277,8 +290,8 @@ // is not available for WAL though. CloseDb(); #endif - Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record - Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block + Corrupt(kWalFile, 19, 1); // WriteBatch tag for first record + Corrupt(kWalFile, log::kBlockSize + 1000, 1); // Somewhere in second block ASSERT_TRUE(!TryReopen().ok()); options_.paranoid_checks = false; Reopen(&options_); @@ -288,14 +301,14 @@ } TEST_F(CorruptionTest, RecoverWriteError) { - env_.writable_file_error_ = true; + env_->writable_file_error_ = true; Status s = TryReopen(); ASSERT_TRUE(!s.ok()); } TEST_F(CorruptionTest, NewFileErrorDuringWrite) { // Do enough writing to force minor compaction - env_.writable_file_error_ = true; + env_->writable_file_error_ = true; const int num = static_cast(3 + (Options().write_buffer_size / kValueSize)); std::string value_storage; @@ -303,7 +316,7 @@ bool failed = false; for (int i = 0; i < num; i++) { WriteBatch batch; - batch.Put("a", Value(100, &value_storage)); + ASSERT_OK(batch.Put("a", Value(100, &value_storage))); s = db_->Write(WriteOptions(), &batch); if (!s.ok()) { failed = true; @@ -311,17 +324,17 @@ ASSERT_TRUE(!failed || !s.ok()); } ASSERT_TRUE(!s.ok()); - ASSERT_GE(env_.num_writable_file_errors_, 1); - env_.writable_file_error_ = false; + ASSERT_GE(env_->num_writable_file_errors_, 1); + env_->writable_file_error_ = false; Reopen(); } TEST_F(CorruptionTest, TableFile) { Build(100); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_FlushMemTable(); - dbi->TEST_CompactRange(0, nullptr, nullptr); - dbi->TEST_CompactRange(1, nullptr, nullptr); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr)); + ASSERT_OK(dbi->TEST_CompactRange(1, nullptr, nullptr)); Corrupt(kTableFile, 100, 1); Check(99, 99); @@ -330,7 +343,7 @@ TEST_F(CorruptionTest, VerifyChecksumReadahead) { Options options; - SpecialEnv senv(Env::Default()); + SpecialEnv senv(env_->target()); options.env = &senv; // Disable block cache as we are going to check checksum for // the same file twice and measure number of reads. @@ -341,10 +354,10 @@ Reopen(&options); Build(10000); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_FlushMemTable(); - dbi->TEST_CompactRange(0, nullptr, nullptr); - dbi->TEST_CompactRange(1, nullptr, nullptr); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr)); + ASSERT_OK(dbi->TEST_CompactRange(1, nullptr, nullptr)); senv.count_random_reads_ = true; senv.random_read_counter_.Reset(); @@ -388,14 +401,14 @@ Reopen(&options); // build 2 tables, flush at 5000 Build(10000, 5000); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_FlushMemTable(); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); // corrupt an index block of an entire file Corrupt(kTableFile, -2000, 500); options.paranoid_checks = false; Reopen(&options); - dbi = reinterpret_cast(db_); + dbi = static_cast_with_check(db_); // one full file may be readable, since only one was corrupted // the other file should be fully non-readable, since index was corrupted Check(0, 5000); @@ -435,9 +448,9 @@ TEST_F(CorruptionTest, CorruptedDescriptor) { ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello")); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_FlushMemTable(); - dbi->TEST_CompactRange(0, nullptr, nullptr); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr)); Corrupt(kDescriptorFile, 0, 1000); Status s = TryReopen(); @@ -452,12 +465,13 @@ TEST_F(CorruptionTest, CompactionInputError) { Options options; + options.env = env_; Reopen(&options); Build(10); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_FlushMemTable(); - dbi->TEST_CompactRange(0, nullptr, nullptr); - dbi->TEST_CompactRange(1, nullptr, nullptr); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr)); + ASSERT_OK(dbi->TEST_CompactRange(1, nullptr, nullptr)); ASSERT_EQ(1, Property("rocksdb.num-files-at-level2")); Corrupt(kTableFile, 100, 1); @@ -472,29 +486,30 @@ TEST_F(CorruptionTest, CompactionInputErrorParanoid) { Options options; + options.env = env_; options.paranoid_checks = true; options.write_buffer_size = 131072; options.max_write_buffer_number = 2; Reopen(&options); - DBImpl* dbi = reinterpret_cast(db_); + DBImpl* dbi = static_cast_with_check(db_); // Fill levels >= 1 for (int level = 1; level < dbi->NumberLevels(); level++) { - dbi->Put(WriteOptions(), "", "begin"); - dbi->Put(WriteOptions(), "~", "end"); - dbi->TEST_FlushMemTable(); + ASSERT_OK(dbi->Put(WriteOptions(), "", "begin")); + ASSERT_OK(dbi->Put(WriteOptions(), "~", "end")); + ASSERT_OK(dbi->TEST_FlushMemTable()); for (int comp_level = 0; comp_level < dbi->NumberLevels() - level; ++comp_level) { - dbi->TEST_CompactRange(comp_level, nullptr, nullptr); + ASSERT_OK(dbi->TEST_CompactRange(comp_level, nullptr, nullptr)); } } Reopen(&options); - dbi = reinterpret_cast(db_); + dbi = static_cast_with_check(db_); Build(10); - dbi->TEST_FlushMemTable(); - dbi->TEST_WaitForCompact(); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_WaitForCompact()); ASSERT_EQ(1, Property("rocksdb.num-files-at-level0")); CorruptTableFileAtLevel(0, 100, 1); @@ -518,8 +533,8 @@ TEST_F(CorruptionTest, UnrelatedKeys) { Build(10); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_FlushMemTable(); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); Corrupt(kTableFile, 100, 1); ASSERT_NOK(dbi->VerifyChecksum()); @@ -528,7 +543,7 @@ std::string v; ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); ASSERT_EQ(Value(1000, &tmp2).ToString(), v); - dbi->TEST_FlushMemTable(); + ASSERT_OK(dbi->TEST_FlushMemTable()); ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); ASSERT_EQ(Value(1000, &tmp2).ToString(), v); } @@ -542,37 +557,40 @@ ASSERT_EQ(static_cast(1), metadata.size()); std::string filename = dbname_ + metadata[0].name; - std::unique_ptr file; - ASSERT_OK(options_.env->NewRandomAccessFile(filename, &file, EnvOptions())); - std::unique_ptr file_reader( - new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file), - filename)); + FileOptions file_opts; + const auto& fs = options_.env->GetFileSystem(); + std::unique_ptr file_reader; + ASSERT_OK(RandomAccessFileReader::Create(fs, filename, file_opts, + &file_reader, nullptr)); uint64_t file_size; - ASSERT_OK(options_.env->GetFileSize(filename, &file_size)); + ASSERT_OK( + fs->GetFileSize(filename, file_opts.io_options, &file_size, nullptr)); BlockHandle range_del_handle; - ASSERT_OK(FindMetaBlock( + ASSERT_OK(FindMetaBlockInFile( file_reader.get(), file_size, kBlockBasedTableMagicNumber, - ImmutableCFOptions(options_), kRangeDelBlock, &range_del_handle)); + ImmutableOptions(options_), kRangeDelBlockName, &range_del_handle)); ASSERT_OK(TryReopen()); - CorruptFile(filename, static_cast(range_del_handle.offset()), 1); + ASSERT_OK(test::CorruptFile(env_, filename, + static_cast(range_del_handle.offset()), 1)); ASSERT_TRUE(TryReopen().IsCorruption()); } TEST_F(CorruptionTest, FileSystemStateCorrupted) { for (int iter = 0; iter < 2; ++iter) { Options options; + options.env = env_; options.paranoid_checks = true; options.create_if_missing = true; Reopen(&options); Build(10); ASSERT_OK(db_->Flush(FlushOptions())); - DBImpl* dbi = reinterpret_cast(db_); + DBImpl* dbi = static_cast_with_check(db_); std::vector metadata; dbi->GetLiveFilesMetaData(&metadata); - ASSERT_GT(metadata.size(), size_t(0)); + ASSERT_GT(metadata.size(), 0); std::string filename = dbname_ + metadata[0].name; delete db_; @@ -580,25 +598,326 @@ if (iter == 0) { // corrupt file size std::unique_ptr file; - env_.NewWritableFile(filename, &file, EnvOptions()); - file->Append(Slice("corrupted sst")); + ASSERT_OK(env_->NewWritableFile(filename, &file, EnvOptions())); + ASSERT_OK(file->Append(Slice("corrupted sst"))); file.reset(); Status x = TryReopen(&options); ASSERT_TRUE(x.IsCorruption()); } else { // delete the file - env_.DeleteFile(filename); + ASSERT_OK(env_->DeleteFile(filename)); Status x = TryReopen(&options); - ASSERT_TRUE(x.IsPathNotFound()); + ASSERT_TRUE(x.IsCorruption()); + } + + ASSERT_OK(DestroyDB(dbname_, options_)); + } +} + +static const auto& corruption_modes = { + mock::MockTableFactory::kCorruptNone, mock::MockTableFactory::kCorruptKey, + mock::MockTableFactory::kCorruptValue, + mock::MockTableFactory::kCorruptReorderKey}; + +TEST_F(CorruptionTest, ParanoidFileChecksOnFlush) { + Options options; + options.env = env_; + options.check_flush_compaction_key_order = false; + options.paranoid_file_checks = true; + options.create_if_missing = true; + Status s; + for (const auto& mode : corruption_modes) { + delete db_; + db_ = nullptr; + s = DestroyDB(dbname_, options); + ASSERT_OK(s); + std::shared_ptr mock = + std::make_shared(); + options.table_factory = mock; + mock->SetCorruptionMode(mode); + ASSERT_OK(DB::Open(options, dbname_, &db_)); + assert(db_ != nullptr); // suppress false clang-analyze report + Build(10); + s = db_->Flush(FlushOptions()); + if (mode == mock::MockTableFactory::kCorruptNone) { + ASSERT_OK(s); + } else { + ASSERT_NOK(s); + } + } +} + +TEST_F(CorruptionTest, ParanoidFileChecksOnCompact) { + Options options; + options.env = env_; + options.paranoid_file_checks = true; + options.create_if_missing = true; + options.check_flush_compaction_key_order = false; + Status s; + for (const auto& mode : corruption_modes) { + delete db_; + db_ = nullptr; + s = DestroyDB(dbname_, options); + std::shared_ptr mock = + std::make_shared(); + options.table_factory = mock; + ASSERT_OK(DB::Open(options, dbname_, &db_)); + assert(db_ != nullptr); // suppress false clang-analyze report + Build(100, 2); + // ASSERT_OK(db_->Flush(FlushOptions())); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + mock->SetCorruptionMode(mode); + s = dbi->TEST_CompactRange(0, nullptr, nullptr, nullptr, true); + if (mode == mock::MockTableFactory::kCorruptNone) { + ASSERT_OK(s); + } else { + ASSERT_NOK(s); + } + } +} + +TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeFirst) { + Options options; + options.env = env_; + options.check_flush_compaction_key_order = false; + options.paranoid_file_checks = true; + options.create_if_missing = true; + for (bool do_flush : {true, false}) { + delete db_; + db_ = nullptr; + ASSERT_OK(DestroyDB(dbname_, options)); + ASSERT_OK(DB::Open(options, dbname_, &db_)); + std::string start, end; + assert(db_ != nullptr); // suppress false clang-analyze report + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(3, &start), Key(7, &end))); + auto snap = db_->GetSnapshot(); + ASSERT_NE(snap, nullptr); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(8, &start), Key(9, &end))); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(2, &start), Key(5, &end))); + Build(10); + if (do_flush) { + ASSERT_OK(db_->Flush(FlushOptions())); + } else { + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); } + db_->ReleaseSnapshot(snap); + } +} - DestroyDB(dbname_, options_); +TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRange) { + Options options; + options.env = env_; + options.check_flush_compaction_key_order = false; + options.paranoid_file_checks = true; + options.create_if_missing = true; + for (bool do_flush : {true, false}) { + delete db_; + db_ = nullptr; + ASSERT_OK(DestroyDB(dbname_, options)); + ASSERT_OK(DB::Open(options, dbname_, &db_)); + assert(db_ != nullptr); // suppress false clang-analyze report + Build(10, 0, 0); + std::string start, end; + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(5, &start), Key(15, &end))); + auto snap = db_->GetSnapshot(); + ASSERT_NE(snap, nullptr); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(8, &start), Key(9, &end))); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(12, &start), Key(17, &end))); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(2, &start), Key(4, &end))); + Build(10, 10, 0); + if (do_flush) { + ASSERT_OK(db_->Flush(FlushOptions())); + } else { + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); + } + db_->ReleaseSnapshot(snap); } } +TEST_F(CorruptionTest, ParanoidFileChecksWithDeleteRangeLast) { + Options options; + options.env = env_; + options.check_flush_compaction_key_order = false; + options.paranoid_file_checks = true; + options.create_if_missing = true; + for (bool do_flush : {true, false}) { + delete db_; + db_ = nullptr; + ASSERT_OK(DestroyDB(dbname_, options)); + ASSERT_OK(DB::Open(options, dbname_, &db_)); + assert(db_ != nullptr); // suppress false clang-analyze report + std::string start, end; + Build(10); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(3, &start), Key(7, &end))); + auto snap = db_->GetSnapshot(); + ASSERT_NE(snap, nullptr); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(6, &start), Key(8, &end))); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(2, &start), Key(5, &end))); + if (do_flush) { + ASSERT_OK(db_->Flush(FlushOptions())); + } else { + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); + } + db_->ReleaseSnapshot(snap); + } +} + +TEST_F(CorruptionTest, LogCorruptionErrorsInCompactionIterator) { + Options options; + options.env = env_; + options.create_if_missing = true; + options.allow_data_in_errors = true; + auto mode = mock::MockTableFactory::kCorruptKey; + delete db_; + db_ = nullptr; + ASSERT_OK(DestroyDB(dbname_, options)); + + std::shared_ptr mock = + std::make_shared(); + mock->SetCorruptionMode(mode); + options.table_factory = mock; + + ASSERT_OK(DB::Open(options, dbname_, &db_)); + assert(db_ != nullptr); // suppress false clang-analyze report + Build(100, 2); + + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + Status s = dbi->TEST_CompactRange(0, nullptr, nullptr, nullptr, true); + ASSERT_NOK(s); + ASSERT_TRUE(s.IsCorruption()); +} + +TEST_F(CorruptionTest, CompactionKeyOrderCheck) { + Options options; + options.env = env_; + options.paranoid_file_checks = false; + options.create_if_missing = true; + options.check_flush_compaction_key_order = false; + delete db_; + db_ = nullptr; + ASSERT_OK(DestroyDB(dbname_, options)); + std::shared_ptr mock = + std::make_shared(); + options.table_factory = mock; + ASSERT_OK(DB::Open(options, dbname_, &db_)); + assert(db_ != nullptr); // suppress false clang-analyze report + mock->SetCorruptionMode(mock::MockTableFactory::kCorruptReorderKey); + Build(100, 2); + DBImpl* dbi = static_cast_with_check(db_); + ASSERT_OK(dbi->TEST_FlushMemTable()); + + mock->SetCorruptionMode(mock::MockTableFactory::kCorruptNone); + ASSERT_OK(db_->SetOptions({{"check_flush_compaction_key_order", "true"}})); + ASSERT_NOK(dbi->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); +} + +TEST_F(CorruptionTest, FlushKeyOrderCheck) { + Options options; + options.env = env_; + options.paranoid_file_checks = false; + options.create_if_missing = true; + ASSERT_OK(db_->SetOptions({{"check_flush_compaction_key_order", "true"}})); + + ASSERT_OK(db_->Put(WriteOptions(), "foo1", "v1")); + ASSERT_OK(db_->Put(WriteOptions(), "foo2", "v1")); + ASSERT_OK(db_->Put(WriteOptions(), "foo3", "v1")); + ASSERT_OK(db_->Put(WriteOptions(), "foo4", "v1")); + + int cnt = 0; + // Generate some out of order keys from the memtable + SyncPoint::GetInstance()->SetCallBack( + "MemTableIterator::Next:0", [&](void* arg) { + MemTableRep::Iterator* mem_iter = + static_cast(arg); + if (++cnt == 3) { + mem_iter->Prev(); + mem_iter->Prev(); + } + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + Status s = static_cast_with_check(db_)->TEST_FlushMemTable(); + ASSERT_NOK(s); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(CorruptionTest, DisableKeyOrderCheck) { + ASSERT_OK(db_->SetOptions({{"check_flush_compaction_key_order", "false"}})); + DBImpl* dbi = static_cast_with_check(db_); + + SyncPoint::GetInstance()->SetCallBack( + "OutputValidator::Add:order_check", + [&](void* /*arg*/) { ASSERT_TRUE(false); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(db_->Put(WriteOptions(), "foo1", "v1")); + ASSERT_OK(db_->Put(WriteOptions(), "foo3", "v1")); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(db_->Put(WriteOptions(), "foo2", "v1")); + ASSERT_OK(db_->Put(WriteOptions(), "foo4", "v1")); + ASSERT_OK(dbi->TEST_FlushMemTable()); + ASSERT_OK(dbi->TEST_CompactRange(0, nullptr, nullptr, nullptr, true)); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(CorruptionTest, VerifyWholeTableChecksum) { + CloseDb(); + Options options; + options.env = env_; + ASSERT_OK(DestroyDB(dbname_, options)); + options.create_if_missing = true; + options.file_checksum_gen_factory = + ROCKSDB_NAMESPACE::GetFileChecksumGenCrc32cFactory(); + Reopen(&options); + + Build(10, 5); + + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); + CloseDb(); + + // Corrupt the first byte of each table file, this must be data block. + Corrupt(kTableFile, 0, 1); + + ASSERT_OK(TryReopen(&options)); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + int count{0}; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::VerifyFullFileChecksum:mismatch", [&](void* arg) { + auto* s = reinterpret_cast(arg); + ASSERT_NE(s, nullptr); + ++count; + ASSERT_NOK(*s); + }); + SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsCorruption()); + ASSERT_EQ(1, count); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); + RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/cuckoo_table_db_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/cuckoo_table_db_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/cuckoo_table_db_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/cuckoo_table_db_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -6,6 +6,7 @@ #ifndef ROCKSDB_LITE #include "db/db_impl/db_impl.h" +#include "db/db_test_util.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "table/cuckoo/cuckoo_table_factory.h" @@ -13,6 +14,7 @@ #include "table/meta_blocks.h" #include "test_util/testharness.h" #include "test_util/testutil.h" +#include "util/cast_util.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { @@ -46,9 +48,7 @@ return options; } - DBImpl* dbfull() { - return reinterpret_cast(db_); - } + DBImpl* dbfull() { return static_cast_with_check(db_); } // The following util methods are copied from plain_table_db_test. void Reopen(Options* options = nullptr) { @@ -64,6 +64,15 @@ ASSERT_OK(DB::Open(opts, dbname_, &db_)); } + void DestroyAndReopen(Options* options) { + assert(options); + ASSERT_OK(db_->Close()); + delete db_; + db_ = nullptr; + ASSERT_OK(DestroyDB(dbname_, *options)); + Reopen(options); + } + Status Put(const Slice& k, const Slice& v) { return db_->Put(WriteOptions(), k, v); } @@ -86,8 +95,8 @@ int NumTableFilesAtLevel(int level) { std::string property; - EXPECT_TRUE(db_->GetProperty( - "rocksdb.num-files-at-level" + NumberToString(level), &property)); + EXPECT_TRUE(db_->GetProperty("rocksdb.num-files-at-level" + ToString(level), + &property)); return atoi(property.c_str()); } @@ -121,10 +130,11 @@ ASSERT_OK(Put("key1", "v1")); ASSERT_OK(Put("key2", "v2")); ASSERT_OK(Put("key3", "v3")); - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); TablePropertiesCollection ptc; - reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); + ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc)); + VerifySstUniqueIds(ptc); ASSERT_EQ(1U, ptc.size()); ASSERT_EQ(3U, ptc.begin()->second->num_entries); ASSERT_EQ("1", FilesPerLevel()); @@ -138,9 +148,10 @@ ASSERT_OK(Put("key4", "v4")); ASSERT_OK(Put("key5", "v5")); ASSERT_OK(Put("key6", "v6")); - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); - reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); + ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc)); + VerifySstUniqueIds(ptc); ASSERT_EQ(2U, ptc.size()); auto row = ptc.begin(); ASSERT_EQ(3U, row->second->num_entries); @@ -156,8 +167,9 @@ ASSERT_OK(Delete("key6")); ASSERT_OK(Delete("key5")); ASSERT_OK(Delete("key4")); - dbfull()->TEST_FlushMemTable(); - reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); + ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc)); + VerifySstUniqueIds(ptc); ASSERT_EQ(3U, ptc.size()); row = ptc.begin(); ASSERT_EQ(3U, row->second->num_entries); @@ -178,10 +190,11 @@ ASSERT_OK(Put("key1", "v1")); ASSERT_OK(Put("key2", "v2")); ASSERT_OK(Put("key1", "v3")); // Duplicate - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); TablePropertiesCollection ptc; - reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc); + ASSERT_OK(reinterpret_cast(dbfull())->GetPropertiesOfAllTables(&ptc)); + VerifySstUniqueIds(ptc); ASSERT_EQ(1U, ptc.size()); ASSERT_EQ(2U, ptc.begin()->second->num_entries); ASSERT_EQ("1", FilesPerLevel()); @@ -206,12 +219,12 @@ TEST_F(CuckooTableDBTest, Uint64Comparator) { Options options = CurrentOptions(); options.comparator = test::Uint64Comparator(); - Reopen(&options); + DestroyAndReopen(&options); ASSERT_OK(Put(Uint64Key(1), "v1")); ASSERT_OK(Put(Uint64Key(2), "v2")); ASSERT_OK(Put(Uint64Key(3), "v3")); - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); ASSERT_EQ("v1", Get(Uint64Key(1))); ASSERT_EQ("v2", Get(Uint64Key(2))); @@ -220,10 +233,10 @@ // Add more keys. ASSERT_OK(Delete(Uint64Key(2))); // Delete. - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); ASSERT_OK(Put(Uint64Key(3), "v0")); // Update. ASSERT_OK(Put(Uint64Key(4), "v4")); - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); ASSERT_EQ("v1", Get(Uint64Key(1))); ASSERT_EQ("NOT_FOUND", Get(Uint64Key(2))); ASSERT_EQ("v0", Get(Uint64Key(3))); @@ -243,11 +256,11 @@ for (int idx = 0; idx < 28; ++idx) { ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + char(idx)))); } - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_EQ("1", FilesPerLevel()); - dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, - true /* disallow trivial move */); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, + true /* disallow trivial move */)); ASSERT_EQ("0,2", FilesPerLevel()); for (int idx = 0; idx < 28; ++idx) { ASSERT_EQ(std::string(10000, 'a' + char(idx)), Get(Key(idx))); @@ -266,15 +279,15 @@ for (int idx = 0; idx < 11; ++idx) { ASSERT_OK(Put(Key(idx), std::string(10000, 'a'))); } - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_EQ("1", FilesPerLevel()); // Generate one more file in level-0, and should trigger level-0 compaction for (int idx = 0; idx < 11; ++idx) { ASSERT_OK(Put(Key(idx), std::string(10000, 'a' + char(idx)))); } - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_CompactRange(0, nullptr, nullptr); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr)); ASSERT_EQ("0,1", FilesPerLevel()); for (int idx = 0; idx < 11; ++idx) { @@ -295,7 +308,7 @@ ASSERT_OK(Put("key1", "v1")); ASSERT_OK(Put("key2", "v2")); ASSERT_OK(Put("key3", "v3")); - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); // Write some keys using plain table. std::shared_ptr block_based_factory( @@ -311,7 +324,7 @@ Reopen(&options); ASSERT_OK(Put("key4", "v4")); ASSERT_OK(Put("key1", "v5")); - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); // Write some keys using block based table. options.table_factory.reset(NewAdaptiveTableFactory( @@ -320,7 +333,7 @@ Reopen(&options); ASSERT_OK(Put("key5", "v6")); ASSERT_OK(Put("key2", "v7")); - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); ASSERT_EQ("v5", Get("key1")); ASSERT_EQ("v7", Get("key2")); diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_basic_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_basic_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_basic_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_basic_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -6,30 +6,44 @@ // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include + #include "db/db_test_util.h" +#include "options/options_helper.h" #include "port/stack_trace.h" +#include "rocksdb/flush_block_policy.h" +#include "rocksdb/merge_operator.h" #include "rocksdb/perf_context.h" +#include "rocksdb/table.h" #include "rocksdb/utilities/debug.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" -#include "test_util/fault_injection_test_env.h" #if !defined(ROCKSDB_LITE) #include "test_util/sync_point.h" #endif +#include "util/file_checksum_helper.h" +#include "util/random.h" +#include "utilities/fault_injection_env.h" +#include "utilities/merge_operators.h" +#include "utilities/merge_operators/string_append/stringappend.h" namespace ROCKSDB_NAMESPACE { class DBBasicTest : public DBTestBase { public: - DBBasicTest() : DBTestBase("/db_basic_test") {} + DBBasicTest() : DBTestBase("db_basic_test", /*env_do_fsync=*/false) {} }; TEST_F(DBBasicTest, OpenWhenOpen) { Options options = CurrentOptions(); options.env = env_; - ROCKSDB_NAMESPACE::DB* db2 = nullptr; - ROCKSDB_NAMESPACE::Status s = DB::Open(options, dbname_, &db2); - + DB* db2 = nullptr; + Status s = DB::Open(options, dbname_, &db2); + ASSERT_NOK(s) << [db2]() { + delete db2; + return "db2 open: ok"; + }(); ASSERT_EQ(Status::Code::kIOError, s.code()); ASSERT_EQ(Status::SubCode::kNone, s.subcode()); ASSERT_TRUE(strstr(s.getState(), "lock ") != nullptr); @@ -37,6 +51,62 @@ delete db2; } +TEST_F(DBBasicTest, UniqueSession) { + Options options = CurrentOptions(); + std::string sid1, sid2, sid3, sid4; + + ASSERT_OK(db_->GetDbSessionId(sid1)); + Reopen(options); + ASSERT_OK(db_->GetDbSessionId(sid2)); + ASSERT_OK(Put("foo", "v1")); + ASSERT_OK(db_->GetDbSessionId(sid4)); + Reopen(options); + ASSERT_OK(db_->GetDbSessionId(sid3)); + + ASSERT_NE(sid1, sid2); + ASSERT_NE(sid1, sid3); + ASSERT_NE(sid2, sid3); + + ASSERT_EQ(sid2, sid4); + + // Expected compact format for session ids (see notes in implementation) + TestRegex expected("[0-9A-Z]{20}"); + EXPECT_MATCHES_REGEX(sid1, expected); + EXPECT_MATCHES_REGEX(sid2, expected); + EXPECT_MATCHES_REGEX(sid3, expected); + +#ifndef ROCKSDB_LITE + Close(); + ASSERT_OK(ReadOnlyReopen(options)); + ASSERT_OK(db_->GetDbSessionId(sid1)); + // Test uniqueness between readonly open (sid1) and regular open (sid3) + ASSERT_NE(sid1, sid3); + Close(); + ASSERT_OK(ReadOnlyReopen(options)); + ASSERT_OK(db_->GetDbSessionId(sid2)); + ASSERT_EQ("v1", Get("foo")); + ASSERT_OK(db_->GetDbSessionId(sid3)); + + ASSERT_NE(sid1, sid2); + + ASSERT_EQ(sid2, sid3); +#endif // ROCKSDB_LITE + + CreateAndReopenWithCF({"goku"}, options); + ASSERT_OK(db_->GetDbSessionId(sid1)); + ASSERT_OK(Put("bar", "e1")); + ASSERT_OK(db_->GetDbSessionId(sid2)); + ASSERT_EQ("e1", Get("bar")); + ASSERT_OK(db_->GetDbSessionId(sid3)); + ReopenWithColumnFamilies({"default", "goku"}, options); + ASSERT_OK(db_->GetDbSessionId(sid4)); + + ASSERT_EQ(sid1, sid2); + ASSERT_EQ(sid2, sid3); + + ASSERT_NE(sid1, sid4); +} + #ifndef ROCKSDB_LITE TEST_F(DBBasicTest, ReadOnlyDB) { ASSERT_OK(Put("foo", "v1")); @@ -44,29 +114,46 @@ ASSERT_OK(Put("foo", "v3")); Close(); + auto verify_one_iter = [&](Iterator* iter) { + int count = 0; + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + ++count; + } + // Always expect two keys: "foo" and "bar" + ASSERT_EQ(count, 2); + }; + + auto verify_all_iters = [&]() { + Iterator* iter = db_->NewIterator(ReadOptions()); + verify_one_iter(iter); + delete iter; + + std::vector iters; + ASSERT_OK(db_->NewIterators(ReadOptions(), + {dbfull()->DefaultColumnFamily()}, &iters)); + ASSERT_EQ(static_cast(1), iters.size()); + verify_one_iter(iters[0]); + delete iters[0]; + }; + auto options = CurrentOptions(); assert(options.env == env_); ASSERT_OK(ReadOnlyReopen(options)); ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v2", Get("bar")); - Iterator* iter = db_->NewIterator(ReadOptions()); - int count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ASSERT_OK(iter->status()); - ++count; - } - ASSERT_EQ(count, 2); - delete iter; + verify_all_iters(); Close(); // Reopen and flush memtable. Reopen(options); - Flush(); + ASSERT_OK(Flush()); Close(); // Now check keys in read only mode. ASSERT_OK(ReadOnlyReopen(options)); ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v2", Get("bar")); + verify_all_iters(); ASSERT_TRUE(db_->SyncWAL().IsNotSupported()); } @@ -81,7 +168,7 @@ assert(options.env == env_); ASSERT_OK(ReadOnlyReopen(options)); std::string db_id1; - db_->GetDbIdentity(db_id1); + ASSERT_OK(db_->GetDbIdentity(db_id1)); ASSERT_EQ("v3", Get("foo")); ASSERT_EQ("v2", Get("bar")); Iterator* iter = db_->NewIterator(ReadOptions()); @@ -96,7 +183,7 @@ // Reopen and flush memtable. Reopen(options); - Flush(); + ASSERT_OK(Flush()); Close(); // Now check keys in read only mode. ASSERT_OK(ReadOnlyReopen(options)); @@ -104,7 +191,7 @@ ASSERT_EQ("v2", Get("bar")); ASSERT_TRUE(db_->SyncWAL().IsNotSupported()); std::string db_id2; - db_->GetDbIdentity(db_id2); + ASSERT_OK(db_->GetDbIdentity(db_id2)); ASSERT_EQ(db_id1, db_id2); } @@ -119,7 +206,7 @@ Reopen(options); // 1 L0 file, use CompactedDB if max_open_files = -1 ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, '1'))); - Flush(); + ASSERT_OK(Flush()); Close(); ASSERT_OK(ReadOnlyReopen(options)); Status s = Put("new", "value"); @@ -137,12 +224,12 @@ Reopen(options); // Add more L0 files ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, '2'))); - Flush(); + ASSERT_OK(Flush()); ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, 'a'))); - Flush(); + ASSERT_OK(Flush()); ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, 'b'))); ASSERT_OK(Put("eee", DummyString(kFileSize / 2, 'e'))); - Flush(); + ASSERT_OK(Flush()); Close(); ASSERT_OK(ReadOnlyReopen(options)); @@ -159,7 +246,7 @@ ASSERT_OK(Put("hhh", DummyString(kFileSize / 2, 'h'))); ASSERT_OK(Put("iii", DummyString(kFileSize / 2, 'i'))); ASSERT_OK(Put("jjj", DummyString(kFileSize / 2, 'j'))); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(3, NumTableFilesAtLevel(1)); Close(); @@ -217,8 +304,8 @@ int i = 0; while (NumTableFilesAtLevel(2, 1) == 0) { ASSERT_OK(Put(1, Key(i++), value)); - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); } options.num_levels = 1; @@ -272,8 +359,8 @@ options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); - Put(1, "a", Slice()); - SingleDelete(1, "a"); + ASSERT_OK(Put(1, "a", Slice())); + ASSERT_OK(SingleDelete(1, "a")); ASSERT_OK(Flush(1)); ASSERT_EQ("[ ]", AllEntriesFor("a", 1)); @@ -319,12 +406,19 @@ TEST_F(DBBasicTest, CheckLock) { do { - DB* localdb; + DB* localdb = nullptr; Options options = CurrentOptions(); ASSERT_OK(TryReopen(options)); // second open should fail - ASSERT_TRUE(!(DB::Open(options, dbname_, &localdb)).ok()); + Status s = DB::Open(options, dbname_, &localdb); + ASSERT_NOK(s) << [localdb]() { + delete localdb; + return "localdb open: ok"; + }(); +#ifdef OS_LINUX + ASSERT_TRUE(s.ToString().find("lock ") != std::string::npos); +#endif // OS_LINUX } while (ChangeCompactOptions()); } @@ -392,7 +486,7 @@ sleeping_task_low.WaitUntilDone(); } -TEST_F(DBBasicTest, FLUSH) { +TEST_F(DBBasicTest, Flush) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); WriteOptions writeOpt = WriteOptions(); @@ -513,29 +607,30 @@ #ifndef ROCKSDB_LITE TEST_F(DBBasicTest, Snapshot) { + env_->SetMockSleep(); anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override)); - Put(0, "foo", "0v1"); - Put(1, "foo", "1v1"); + ASSERT_OK(Put(0, "foo", "0v1")); + ASSERT_OK(Put(1, "foo", "1v1")); const Snapshot* s1 = db_->GetSnapshot(); ASSERT_EQ(1U, GetNumSnapshots()); uint64_t time_snap1 = GetTimeOldestSnapshots(); ASSERT_GT(time_snap1, 0U); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - Put(0, "foo", "0v2"); - Put(1, "foo", "1v2"); + ASSERT_OK(Put(0, "foo", "0v2")); + ASSERT_OK(Put(1, "foo", "1v2")); - env_->addon_time_.fetch_add(1); + env_->MockSleepForSeconds(1); const Snapshot* s2 = db_->GetSnapshot(); ASSERT_EQ(2U, GetNumSnapshots()); ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - Put(0, "foo", "0v3"); - Put(1, "foo", "1v3"); + ASSERT_OK(Put(0, "foo", "0v3")); + ASSERT_OK(Put(1, "foo", "1v3")); { ManagedSnapshot s3(db_); @@ -543,8 +638,8 @@ ASSERT_EQ(time_snap1, GetTimeOldestSnapshots()); ASSERT_EQ(GetSequenceOldestSnapshots(), s1->GetSequenceNumber()); - Put(0, "foo", "0v4"); - Put(1, "foo", "1v4"); + ASSERT_OK(Put(0, "foo", "0v4")); + ASSERT_OK(Put(1, "foo", "1v4")); ASSERT_EQ("0v1", Get(0, "foo", s1)); ASSERT_EQ("1v1", Get(1, "foo", s1)); ASSERT_EQ("0v2", Get(0, "foo", s2)); @@ -584,60 +679,79 @@ #endif // ROCKSDB_LITE -TEST_F(DBBasicTest, CompactBetweenSnapshots) { +class DBBasicMultiConfigs : public DBBasicTest, + public ::testing::WithParamInterface { + public: + DBBasicMultiConfigs() { option_config_ = GetParam(); } + + static std::vector GenerateOptionConfigs() { + std::vector option_configs; + for (int option_config = kDefault; option_config < kEnd; ++option_config) { + if (!ShouldSkipOptions(option_config, kSkipFIFOCompaction)) { + option_configs.push_back(option_config); + } + } + return option_configs; + } +}; + +TEST_P(DBBasicMultiConfigs, CompactBetweenSnapshots) { anon::OptionsOverride options_override; options_override.skip_policy = kSkipNoSnapshot; - do { - Options options = CurrentOptions(options_override); - options.disable_auto_compactions = true; - CreateAndReopenWithCF({"pikachu"}, options); - Random rnd(301); - FillLevels("a", "z", 1); + Options options = CurrentOptions(options_override); + options.disable_auto_compactions = true; + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + Random rnd(301); + FillLevels("a", "z", 1); - Put(1, "foo", "first"); - const Snapshot* snapshot1 = db_->GetSnapshot(); - Put(1, "foo", "second"); - Put(1, "foo", "third"); - Put(1, "foo", "fourth"); - const Snapshot* snapshot2 = db_->GetSnapshot(); - Put(1, "foo", "fifth"); - Put(1, "foo", "sixth"); - - // All entries (including duplicates) exist - // before any compaction or flush is triggered. - ASSERT_EQ(AllEntriesFor("foo", 1), - "[ sixth, fifth, fourth, third, second, first ]"); - ASSERT_EQ("sixth", Get(1, "foo")); - ASSERT_EQ("fourth", Get(1, "foo", snapshot2)); - ASSERT_EQ("first", Get(1, "foo", snapshot1)); + ASSERT_OK(Put(1, "foo", "first")); + const Snapshot* snapshot1 = db_->GetSnapshot(); + ASSERT_OK(Put(1, "foo", "second")); + ASSERT_OK(Put(1, "foo", "third")); + ASSERT_OK(Put(1, "foo", "fourth")); + const Snapshot* snapshot2 = db_->GetSnapshot(); + ASSERT_OK(Put(1, "foo", "fifth")); + ASSERT_OK(Put(1, "foo", "sixth")); + + // All entries (including duplicates) exist + // before any compaction or flush is triggered. + ASSERT_EQ(AllEntriesFor("foo", 1), + "[ sixth, fifth, fourth, third, second, first ]"); + ASSERT_EQ("sixth", Get(1, "foo")); + ASSERT_EQ("fourth", Get(1, "foo", snapshot2)); + ASSERT_EQ("first", Get(1, "foo", snapshot1)); - // After a flush, "second", "third" and "fifth" should - // be removed - ASSERT_OK(Flush(1)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth, first ]"); + // After a flush, "second", "third" and "fifth" should + // be removed + ASSERT_OK(Flush(1)); + ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth, first ]"); - // after we release the snapshot1, only two values left - db_->ReleaseSnapshot(snapshot1); - FillLevels("a", "z", 1); - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); - - // We have only one valid snapshot snapshot2. Since snapshot1 is - // not valid anymore, "first" should be removed by a compaction. - ASSERT_EQ("sixth", Get(1, "foo")); - ASSERT_EQ("fourth", Get(1, "foo", snapshot2)); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth ]"); - - // after we release the snapshot2, only one value should be left - db_->ReleaseSnapshot(snapshot2); - FillLevels("a", "z", 1); - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); - ASSERT_EQ("sixth", Get(1, "foo")); - ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]"); - } while (ChangeOptions(kSkipFIFOCompaction)); + // after we release the snapshot1, only two values left + db_->ReleaseSnapshot(snapshot1); + FillLevels("a", "z", 1); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, + nullptr)); + + // We have only one valid snapshot snapshot2. Since snapshot1 is + // not valid anymore, "first" should be removed by a compaction. + ASSERT_EQ("sixth", Get(1, "foo")); + ASSERT_EQ("fourth", Get(1, "foo", snapshot2)); + ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth ]"); + + // after we release the snapshot2, only one value should be left + db_->ReleaseSnapshot(snapshot2); + FillLevels("a", "z", 1); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, + nullptr)); + ASSERT_EQ("sixth", Get(1, "foo")); + ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]"); } +INSTANTIATE_TEST_CASE_P( + DBBasicMultiConfigs, DBBasicMultiConfigs, + ::testing::ValuesIn(DBBasicMultiConfigs::GenerateOptionConfigs())); + TEST_F(DBBasicTest, DBOpen_Options) { Options options = CurrentOptions(); Close(); @@ -685,18 +799,18 @@ options.disable_auto_compactions = true; CreateAndReopenWithCF({"pikachu"}, options); - Put(1, "foo", "v1"); + ASSERT_OK(Put(1, "foo", "v1")); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v1 ]"); // Write two new keys - Put(1, "a", "begin"); - Put(1, "z", "end"); - Flush(1); + ASSERT_OK(Put(1, "a", "begin")); + ASSERT_OK(Put(1, "z", "end")); + ASSERT_OK(Flush(1)); // Case1: Delete followed by a put - Delete(1, "foo"); - Put(1, "foo", "v2"); + ASSERT_OK(Delete(1, "foo")); + ASSERT_OK(Put(1, "foo", "v2")); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]"); // After the current memtable is flushed, the DEL should @@ -704,66 +818,66 @@ ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]"); - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], + nullptr, nullptr)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]"); // Case 2: Delete followed by another delete - Delete(1, "foo"); - Delete(1, "foo"); + ASSERT_OK(Delete(1, "foo")); + ASSERT_OK(Delete(1, "foo")); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, DEL, v2 ]"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v2 ]"); - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], + nullptr, nullptr)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); // Case 3: Put followed by a delete - Put(1, "foo", "v3"); - Delete(1, "foo"); + ASSERT_OK(Put(1, "foo", "v3")); + ASSERT_OK(Delete(1, "foo")); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v3 ]"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL ]"); - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], + nullptr, nullptr)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); // Case 4: Put followed by another Put - Put(1, "foo", "v4"); - Put(1, "foo", "v5"); + ASSERT_OK(Put(1, "foo", "v4")); + ASSERT_OK(Put(1, "foo", "v5")); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5, v4 ]"); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]"); - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], + nullptr, nullptr)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]"); // clear database - Delete(1, "foo"); - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); + ASSERT_OK(Delete(1, "foo")); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], + nullptr, nullptr)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); // Case 5: Put followed by snapshot followed by another Put // Both puts should remain. - Put(1, "foo", "v6"); + ASSERT_OK(Put(1, "foo", "v6")); const Snapshot* snapshot = db_->GetSnapshot(); - Put(1, "foo", "v7"); + ASSERT_OK(Put(1, "foo", "v7")); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v7, v6 ]"); db_->ReleaseSnapshot(snapshot); // clear database - Delete(1, "foo"); - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); + ASSERT_OK(Delete(1, "foo")); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], + nullptr, nullptr)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]"); // Case 5: snapshot followed by a put followed by another Put // Only the last put should remain. const Snapshot* snapshot1 = db_->GetSnapshot(); - Put(1, "foo", "v8"); - Put(1, "foo", "v9"); + ASSERT_OK(Put(1, "foo", "v8")); + ASSERT_OK(Put(1, "foo", "v9")); ASSERT_OK(Flush(1)); ASSERT_EQ(AllEntriesFor("foo", 1), "[ v9 ]"); db_->ReleaseSnapshot(snapshot1); @@ -786,7 +900,7 @@ ASSERT_OK(Put(7, "popovich", "popovich")); for (int i = 0; i < 8; ++i) { - Flush(i); + ASSERT_OK(Flush(i)); auto tables = ListTableFiles(env_, dbname_); ASSERT_EQ(tables.size(), i + 1U); } @@ -859,16 +973,24 @@ } while (ChangeCompactOptions()); } -TEST_F(DBBasicTest, ChecksumTest) { +class DBBlockChecksumTest : public DBBasicTest, + public testing::WithParamInterface {}; + +INSTANTIATE_TEST_CASE_P(FormatVersions, DBBlockChecksumTest, + testing::ValuesIn(test::kFooterFormatVersionsToTest)); + +TEST_P(DBBlockChecksumTest, BlockChecksumTest) { BlockBasedTableOptions table_options; + table_options.format_version = GetParam(); Options options = CurrentOptions(); - // change when new checksum type added - int max_checksum = static_cast(kxxHash64); const int kNumPerFile = 2; + const auto algs = GetSupportedChecksums(); + const int algs_size = static_cast(algs.size()); + // generate one table with each type of checksum - for (int i = 0; i <= max_checksum; ++i) { - table_options.checksum = static_cast(i); + for (int i = 0; i < algs_size; ++i) { + table_options.checksum = algs[i]; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); for (int j = 0; j < kNumPerFile; ++j) { @@ -878,15 +1000,20 @@ } // with each valid checksum type setting... - for (int i = 0; i <= max_checksum; ++i) { - table_options.checksum = static_cast(i); + for (int i = 0; i < algs_size; ++i) { + table_options.checksum = algs[i]; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); // verify every type of checksum (should be regardless of that setting) - for (int j = 0; j < (max_checksum + 1) * kNumPerFile; ++j) { + for (int j = 0; j < algs_size * kNumPerFile; ++j) { ASSERT_EQ(Key(j), Get(Key(j))); } } + + // Now test invalid checksum type + table_options.checksum = static_cast(123); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_TRUE(TryReopen(options).IsInvalidArgument()); } // On Windows you can have either memory mapped file or a file @@ -919,44 +1046,46 @@ #endif class TestEnv : public EnvWrapper { - public: - explicit TestEnv(Env* base_env) : EnvWrapper(base_env), close_count(0) {} + public: + explicit TestEnv(Env* base_env) : EnvWrapper(base_env), close_count(0) {} + static const char* kClassName() { return "TestEnv"; } + const char* Name() const override { return kClassName(); } - class TestLogger : public Logger { - public: - using Logger::Logv; - explicit TestLogger(TestEnv* env_ptr) : Logger() { env = env_ptr; } - ~TestLogger() override { - if (!closed_) { - CloseHelper(); - } - } - void Logv(const char* /*format*/, va_list /*ap*/) override {} - - protected: - Status CloseImpl() override { return CloseHelper(); } - - private: - Status CloseHelper() { - env->CloseCountInc(); - ; - return Status::IOError(); - } - TestEnv* env; - }; - - void CloseCountInc() { close_count++; } - - int GetCloseCount() { return close_count; } - - Status NewLogger(const std::string& /*fname*/, - std::shared_ptr* result) override { - result->reset(new TestLogger(this)); - return Status::OK(); + class TestLogger : public Logger { + public: + using Logger::Logv; + explicit TestLogger(TestEnv* env_ptr) : Logger() { env = env_ptr; } + ~TestLogger() override { + if (!closed_) { + CloseHelper().PermitUncheckedError(); + } } + void Logv(const char* /*format*/, va_list /*ap*/) override {} + + protected: + Status CloseImpl() override { return CloseHelper(); } private: - int close_count; + Status CloseHelper() { + env->CloseCountInc(); + ; + return Status::IOError(); + } + TestEnv* env; + }; + + void CloseCountInc() { close_count++; } + + int GetCloseCount() { return close_count; } + + Status NewLogger(const std::string& /*fname*/, + std::shared_ptr* result) override { + result->reset(new TestLogger(this)); + return Status::OK(); + } + + private: + int close_count; }; TEST_F(DBBasicTest, DBClose) { @@ -1008,7 +1137,7 @@ Options options = GetDefaultOptions(); options.create_if_missing = true; options.manual_wal_flush = true; - options.write_buffer_size=100; + options.write_buffer_size = 100; options.env = fault_injection_env.get(); Reopen(options); @@ -1018,9 +1147,15 @@ ASSERT_OK(Put("key3", "value3")); fault_injection_env->SetFilesystemActive(false); Status s = dbfull()->Close(); + ASSERT_NE(s, Status::OK()); + // retry should return the same error + s = dbfull()->Close(); + ASSERT_NE(s, Status::OK()); fault_injection_env->SetFilesystemActive(true); + // retry close() is no-op even the system is back. Could be improved if + // Close() is retry-able: #9029 + s = dbfull()->Close(); ASSERT_NE(s, Status::OK()); - Destroy(options); } @@ -1048,7 +1183,7 @@ } int get_sv_count = 0; - ROCKSDB_NAMESPACE::DBImpl* db = reinterpret_cast(db_); + ROCKSDB_NAMESPACE::DBImpl* db = static_cast_with_check(db_); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { if (++get_sv_count == 2) { @@ -1066,7 +1201,7 @@ } if (get_sv_count == 11) { for (int i = 0; i < 8; ++i) { - auto* cfd = reinterpret_cast( + auto* cfd = static_cast_with_check( db->GetColumnFamilyHandle(i)) ->cfd(); ASSERT_EQ(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); @@ -1117,9 +1252,10 @@ ASSERT_EQ(values[2], std::get<2>(cf_kv_vec[1]) + "_2"); for (int cf = 0; cf < 8; ++cf) { - auto* cfd = reinterpret_cast( - reinterpret_cast(db_)->GetColumnFamilyHandle(cf)) - ->cfd(); + auto* cfd = + static_cast_with_check( + static_cast_with_check(db_)->GetColumnFamilyHandle(cf)) + ->cfd(); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVObsolete); } @@ -1179,9 +1315,10 @@ "cf" + std::to_string(j) + "_val" + std::to_string(retries)); } for (int i = 0; i < 8; ++i) { - auto* cfd = reinterpret_cast( - reinterpret_cast(db_)->GetColumnFamilyHandle(i)) - ->cfd(); + auto* cfd = + static_cast_with_check( + static_cast_with_check(db_)->GetColumnFamilyHandle(i)) + ->cfd(); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); } } @@ -1198,7 +1335,7 @@ } int get_sv_count = 0; - ROCKSDB_NAMESPACE::DBImpl* db = reinterpret_cast(db_); + ROCKSDB_NAMESPACE::DBImpl* db = static_cast_with_check(db_); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { if (++get_sv_count == 2) { @@ -1210,7 +1347,7 @@ } if (get_sv_count == 8) { for (int i = 0; i < 8; ++i) { - auto* cfd = reinterpret_cast( + auto* cfd = static_cast_with_check( db->GetColumnFamilyHandle(i)) ->cfd(); ASSERT_TRUE( @@ -1238,13 +1375,36 @@ ASSERT_EQ(values[j], "cf" + std::to_string(j) + "_val"); } for (int i = 0; i < 8; ++i) { - auto* cfd = reinterpret_cast( - reinterpret_cast(db_)->GetColumnFamilyHandle(i)) - ->cfd(); + auto* cfd = + static_cast_with_check( + static_cast_with_check(db_)->GetColumnFamilyHandle(i)) + ->cfd(); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); } } +TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFUnsorted) { + Options options = CurrentOptions(); + CreateAndReopenWithCF({"one", "two"}, options); + + ASSERT_OK(Put(1, "foo", "bar")); + ASSERT_OK(Put(2, "baz", "xyz")); + ASSERT_OK(Put(1, "abc", "def")); + + // Note: keys for the same CF do not form a consecutive range + std::vector cfs{1, 2, 1}; + std::vector keys{"foo", "baz", "abc"}; + std::vector values; + + values = + MultiGet(cfs, keys, /* snapshot */ nullptr, /* batched */ GetParam()); + + ASSERT_EQ(values.size(), 3); + ASSERT_EQ(values[0], "bar"); + ASSERT_EQ(values[1], "xyz"); + ASSERT_EQ(values[2], "def"); +} + INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam, testing::Bool()); @@ -1289,14 +1449,18 @@ } while (ChangeCompactOptions()); } -TEST_F(DBBasicTest, MultiGetBatchedSimpleSorted) { +TEST_F(DBBasicTest, MultiGetBatchedSortedMultiFile) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); SetPerfLevel(kEnableCount); + // To expand the power of this test, generate > 1 table file and + // mix with memtable ASSERT_OK(Put(1, "k1", "v1")); ASSERT_OK(Put(1, "k2", "v2")); + ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "k3", "v3")); ASSERT_OK(Put(1, "k4", "v4")); + ASSERT_OK(Flush(1)); ASSERT_OK(Delete(1, "k4")); ASSERT_OK(Put(1, "k5", "v5")); ASSERT_OK(Delete(1, "no_key")); @@ -1327,7 +1491,58 @@ ASSERT_TRUE(s[5].IsNotFound()); SetPerfLevel(kDisable); - } while (ChangeCompactOptions()); + } while (ChangeOptions()); +} + +TEST_F(DBBasicTest, MultiGetBatchedDuplicateKeys) { + Options opts = CurrentOptions(); + opts.merge_operator = MergeOperators::CreateStringAppendOperator(); + CreateAndReopenWithCF({"pikachu"}, opts); + SetPerfLevel(kEnableCount); + // To expand the power of this test, generate > 1 table file and + // mix with memtable + ASSERT_OK(Merge(1, "k1", "v1")); + ASSERT_OK(Merge(1, "k2", "v2")); + ASSERT_OK(Flush(1)); + MoveFilesToLevel(2, 1); + ASSERT_OK(Merge(1, "k3", "v3")); + ASSERT_OK(Merge(1, "k4", "v4")); + ASSERT_OK(Flush(1)); + MoveFilesToLevel(2, 1); + ASSERT_OK(Merge(1, "k4", "v4_2")); + ASSERT_OK(Merge(1, "k6", "v6")); + ASSERT_OK(Flush(1)); + MoveFilesToLevel(2, 1); + ASSERT_OK(Merge(1, "k7", "v7")); + ASSERT_OK(Merge(1, "k8", "v8")); + ASSERT_OK(Flush(1)); + MoveFilesToLevel(2, 1); + + get_perf_context()->Reset(); + + std::vector keys({"k8", "k8", "k8", "k4", "k4", "k1", "k3"}); + std::vector values(keys.size()); + std::vector cfs(keys.size(), handles_[1]); + std::vector s(keys.size()); + + db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(), + values.data(), s.data(), false); + + ASSERT_EQ(values.size(), keys.size()); + ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v8"); + ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v8"); + ASSERT_EQ(std::string(values[2].data(), values[2].size()), "v8"); + ASSERT_EQ(std::string(values[3].data(), values[3].size()), "v4,v4_2"); + ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v4,v4_2"); + ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v1"); + ASSERT_EQ(std::string(values[6].data(), values[6].size()), "v3"); + ASSERT_EQ(24, (int)get_perf_context()->multiget_read_bytes); + + for (Status& status : s) { + ASSERT_OK(status); + } + + SetPerfLevel(kDisable); } TEST_F(DBBasicTest, MultiGetBatchedMultiLevel) { @@ -1340,12 +1555,12 @@ ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); num_keys++; if (num_keys == 8) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } } if (num_keys > 0) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } MoveFilesToLevel(2); @@ -1354,12 +1569,12 @@ ASSERT_OK(Put("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); num_keys++; if (num_keys == 8) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } } if (num_keys > 0) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } MoveFilesToLevel(1); @@ -1368,12 +1583,12 @@ ASSERT_OK(Put("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); num_keys++; if (num_keys == 8) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } } if (num_keys > 0) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } ASSERT_EQ(0, num_keys); @@ -1419,12 +1634,12 @@ ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); num_keys++; if (num_keys == 8) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } } if (num_keys > 0) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } MoveFilesToLevel(2); @@ -1433,12 +1648,12 @@ ASSERT_OK(Merge("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); num_keys++; if (num_keys == 8) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } } if (num_keys > 0) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } MoveFilesToLevel(1); @@ -1447,18 +1662,19 @@ ASSERT_OK(Merge("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); num_keys++; if (num_keys == 8) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } } if (num_keys > 0) { - Flush(); + ASSERT_OK(Flush()); num_keys = 0; } ASSERT_EQ(0, num_keys); for (int i = 0; i < 128; i += 9) { - ASSERT_OK(Merge("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); + ASSERT_OK( + Merge("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); } std::vector keys; @@ -1490,6 +1706,310 @@ } } +TEST_F(DBBasicTest, MultiGetBatchedValueSizeInMemory) { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + SetPerfLevel(kEnableCount); + ASSERT_OK(Put(1, "k1", "v_1")); + ASSERT_OK(Put(1, "k2", "v_2")); + ASSERT_OK(Put(1, "k3", "v_3")); + ASSERT_OK(Put(1, "k4", "v_4")); + ASSERT_OK(Put(1, "k5", "v_5")); + ASSERT_OK(Put(1, "k6", "v_6")); + std::vector keys = {"k1", "k2", "k3", "k4", "k5", "k6"}; + std::vector values(keys.size()); + std::vector s(keys.size()); + std::vector cfs(keys.size(), handles_[1]); + + get_perf_context()->Reset(); + ReadOptions ro; + ro.value_size_soft_limit = 11; + db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), + s.data(), false); + + ASSERT_EQ(values.size(), keys.size()); + for (unsigned int i = 0; i < 4; i++) { + ASSERT_EQ(std::string(values[i].data(), values[i].size()), + "v_" + std::to_string(i + 1)); + } + + for (unsigned int i = 4; i < 6; i++) { + ASSERT_TRUE(s[i].IsAborted()); + } + + ASSERT_EQ(12, (int)get_perf_context()->multiget_read_bytes); + SetPerfLevel(kDisable); +} + +TEST_F(DBBasicTest, MultiGetBatchedValueSize) { + do { + CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); + SetPerfLevel(kEnableCount); + + ASSERT_OK(Put(1, "k6", "v6")); + ASSERT_OK(Put(1, "k7", "v7_")); + ASSERT_OK(Put(1, "k3", "v3_")); + ASSERT_OK(Put(1, "k4", "v4")); + ASSERT_OK(Flush(1)); + ASSERT_OK(Delete(1, "k4")); + ASSERT_OK(Put(1, "k11", "v11")); + ASSERT_OK(Delete(1, "no_key")); + ASSERT_OK(Put(1, "k8", "v8_")); + ASSERT_OK(Put(1, "k13", "v13")); + ASSERT_OK(Put(1, "k14", "v14")); + ASSERT_OK(Put(1, "k15", "v15")); + ASSERT_OK(Put(1, "k16", "v16")); + ASSERT_OK(Put(1, "k17", "v17")); + ASSERT_OK(Flush(1)); + + ASSERT_OK(Put(1, "k1", "v1_")); + ASSERT_OK(Put(1, "k2", "v2_")); + ASSERT_OK(Put(1, "k5", "v5_")); + ASSERT_OK(Put(1, "k9", "v9_")); + ASSERT_OK(Put(1, "k10", "v10")); + ASSERT_OK(Delete(1, "k2")); + ASSERT_OK(Delete(1, "k6")); + + get_perf_context()->Reset(); + + std::vector keys({"k1", "k10", "k11", "k12", "k13", "k14", "k15", + "k16", "k17", "k2", "k3", "k4", "k5", "k6", "k7", + "k8", "k9", "no_key"}); + std::vector values(keys.size()); + std::vector cfs(keys.size(), handles_[1]); + std::vector s(keys.size()); + + ReadOptions ro; + ro.value_size_soft_limit = 20; + db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(), + s.data(), false); + + ASSERT_EQ(values.size(), keys.size()); + + // In memory keys + ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v1_"); + ASSERT_EQ(std::string(values[1].data(), values[1].size()), "v10"); + ASSERT_TRUE(s[9].IsNotFound()); // k2 + ASSERT_EQ(std::string(values[12].data(), values[12].size()), "v5_"); + ASSERT_TRUE(s[13].IsNotFound()); // k6 + ASSERT_EQ(std::string(values[16].data(), values[16].size()), "v9_"); + + // In sst files + ASSERT_EQ(std::string(values[2].data(), values[1].size()), "v11"); + ASSERT_EQ(std::string(values[4].data(), values[4].size()), "v13"); + ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v14"); + + // Remaining aborted after value_size exceeds. + ASSERT_TRUE(s[3].IsAborted()); + ASSERT_TRUE(s[6].IsAborted()); + ASSERT_TRUE(s[7].IsAborted()); + ASSERT_TRUE(s[8].IsAborted()); + ASSERT_TRUE(s[10].IsAborted()); + ASSERT_TRUE(s[11].IsAborted()); + ASSERT_TRUE(s[14].IsAborted()); + ASSERT_TRUE(s[15].IsAborted()); + ASSERT_TRUE(s[17].IsAborted()); + + // 6 kv pairs * 3 bytes per value (i.e. 18) + ASSERT_EQ(21, (int)get_perf_context()->multiget_read_bytes); + SetPerfLevel(kDisable); + } while (ChangeCompactOptions()); +} + +TEST_F(DBBasicTest, MultiGetBatchedValueSizeMultiLevelMerge) { + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + Reopen(options); + int num_keys = 0; + + for (int i = 0; i < 64; ++i) { + ASSERT_OK(Put("key_" + std::to_string(i), "val_l2_" + std::to_string(i))); + num_keys++; + if (num_keys == 8) { + ASSERT_OK(Flush()); + num_keys = 0; + } + } + if (num_keys > 0) { + ASSERT_OK(Flush()); + num_keys = 0; + } + MoveFilesToLevel(2); + + for (int i = 0; i < 64; i += 3) { + ASSERT_OK(Merge("key_" + std::to_string(i), "val_l1_" + std::to_string(i))); + num_keys++; + if (num_keys == 8) { + ASSERT_OK(Flush()); + num_keys = 0; + } + } + if (num_keys > 0) { + ASSERT_OK(Flush()); + num_keys = 0; + } + MoveFilesToLevel(1); + + for (int i = 0; i < 64; i += 5) { + ASSERT_OK(Merge("key_" + std::to_string(i), "val_l0_" + std::to_string(i))); + num_keys++; + if (num_keys == 8) { + ASSERT_OK(Flush()); + num_keys = 0; + } + } + if (num_keys > 0) { + ASSERT_OK(Flush()); + num_keys = 0; + } + ASSERT_EQ(0, num_keys); + + for (int i = 0; i < 64; i += 9) { + ASSERT_OK( + Merge("key_" + std::to_string(i), "val_mem_" + std::to_string(i))); + } + + std::vector keys_str; + for (int i = 10; i < 50; ++i) { + keys_str.push_back("key_" + std::to_string(i)); + } + + std::vector keys(keys_str.size()); + for (int i = 0; i < 40; i++) { + keys[i] = Slice(keys_str[i]); + } + + std::vector values(keys_str.size()); + std::vector statuses(keys_str.size()); + ReadOptions read_options; + read_options.verify_checksums = true; + read_options.value_size_soft_limit = 380; + db_->MultiGet(read_options, dbfull()->DefaultColumnFamily(), keys.size(), + keys.data(), values.data(), statuses.data()); + + ASSERT_EQ(values.size(), keys.size()); + + for (unsigned int j = 0; j < 26; ++j) { + int key = j + 10; + std::string value; + value.append("val_l2_" + std::to_string(key)); + if (key % 3 == 0) { + value.append(","); + value.append("val_l1_" + std::to_string(key)); + } + if (key % 5 == 0) { + value.append(","); + value.append("val_l0_" + std::to_string(key)); + } + if (key % 9 == 0) { + value.append(","); + value.append("val_mem_" + std::to_string(key)); + } + ASSERT_EQ(values[j], value); + ASSERT_OK(statuses[j]); + } + + // All remaning keys status is set Status::Abort + for (unsigned int j = 26; j < 40; j++) { + ASSERT_TRUE(statuses[j].IsAborted()); + } +} + +TEST_F(DBBasicTest, MultiGetStats) { + Options options; + options.create_if_missing = true; + options.disable_auto_compactions = true; + options.env = env_; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.block_size = 1; + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + table_options.partition_filters = true; + table_options.no_block_cache = true; + table_options.cache_index_and_filter_blocks = false; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"pikachu"}, options); + + int total_keys = 2000; + std::vector keys_str(total_keys); + std::vector keys(total_keys); + static size_t kMultiGetBatchSize = 100; + std::vector values(kMultiGetBatchSize); + std::vector s(kMultiGetBatchSize); + ReadOptions read_opts; + + Random rnd(309); + // Create Multiple SST files at multiple levels. + for (int i = 0; i < 500; ++i) { + keys_str[i] = "k" + std::to_string(i); + keys[i] = Slice(keys_str[i]); + ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000))); + if (i % 100 == 0) { + ASSERT_OK(Flush(1)); + } + } + ASSERT_OK(Flush(1)); + MoveFilesToLevel(2, 1); + + for (int i = 501; i < 1000; ++i) { + keys_str[i] = "k" + std::to_string(i); + keys[i] = Slice(keys_str[i]); + ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000))); + if (i % 100 == 0) { + ASSERT_OK(Flush(1)); + } + } + + ASSERT_OK(Flush(1)); + MoveFilesToLevel(2, 1); + + for (int i = 1001; i < total_keys; ++i) { + keys_str[i] = "k" + std::to_string(i); + keys[i] = Slice(keys_str[i]); + ASSERT_OK(Put(1, "k" + std::to_string(i), rnd.RandomString(1000))); + if (i % 100 == 0) { + ASSERT_OK(Flush(1)); + } + } + ASSERT_OK(Flush(1)); + MoveFilesToLevel(1, 1); + Close(); + + ReopenWithColumnFamilies({"default", "pikachu"}, options); + ASSERT_OK(options.statistics->Reset()); + + db_->MultiGet(read_opts, handles_[1], kMultiGetBatchSize, &keys[1250], + values.data(), s.data(), false); + + ASSERT_EQ(values.size(), kMultiGetBatchSize); + HistogramData hist_data_blocks; + HistogramData hist_index_and_filter_blocks; + HistogramData hist_sst; + + options.statistics->histogramData(NUM_DATA_BLOCKS_READ_PER_LEVEL, + &hist_data_blocks); + options.statistics->histogramData(NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, + &hist_index_and_filter_blocks); + options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst); + + // Maximum number of blocks read from a file system in a level. + ASSERT_EQ(hist_data_blocks.max, 32); + ASSERT_GT(hist_index_and_filter_blocks.max, 0); + // Maximum number of sst files read from file system in a level. + ASSERT_EQ(hist_sst.max, 2); + + // Minimun number of blocks read in a level. + ASSERT_EQ(hist_data_blocks.min, 4); + ASSERT_GT(hist_index_and_filter_blocks.min, 0); + // Minimun number of sst files read in a level. + ASSERT_EQ(hist_sst.min, 1); +} + // Test class for batched MultiGet with prefix extractor // Param bool - If true, use partitioned filters // If false, use full filter block @@ -1565,11 +2085,11 @@ ASSERT_OK(Put(1, "k2", "v2")); ASSERT_OK(Put(1, "k3", "v3")); ASSERT_OK(Put(1, "k4", "v4")); - Flush(1); + ASSERT_OK(Flush(1)); ASSERT_OK(Put(1, "k5", "v5")); const Snapshot* snap1 = dbfull()->GetSnapshot(); ASSERT_OK(Delete(1, "k4")); - Flush(1); + ASSERT_OK(Flush(1)); const Snapshot* snap2 = dbfull()->GetSnapshot(); get_perf_context()->Reset(); @@ -1674,13 +2194,13 @@ ASSERT_EQ(kNumInserts + kNumDeletes + kNumUpdates, key_versions.size()); // Check non-default column family - for (size_t i = 0; i != kNumInserts - 1; ++i) { + for (size_t i = 0; i + 1 != kNumInserts; ++i) { ASSERT_OK(Put(1, std::to_string(i), "value")); } - for (size_t i = 0; i != kNumUpdates - 1; ++i) { + for (size_t i = 0; i + 1 != kNumUpdates; ++i) { ASSERT_OK(Put(1, std::to_string(i), "value1")); } - for (size_t i = 0; i != kNumDeletes - 1; ++i) { + for (size_t i = 0; i + 1 != kNumDeletes; ++i) { ASSERT_OK(Delete(1, std::to_string(i))); } ASSERT_OK(ROCKSDB_NAMESPACE::GetAllKeyVersions( @@ -1696,19 +2216,19 @@ BlockBasedTableOptions table_options; table_options.pin_l0_filter_and_index_blocks_in_cache = true; table_options.block_size = 16 * 1024; - assert(table_options.block_size > - BlockBasedTable::kMultiGetReadStackBufSize); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + ASSERT_TRUE(table_options.block_size > + BlockBasedTable::kMultiGetReadStackBufSize); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); std::string zero_str(128, '\0'); for (int i = 0; i < 100; ++i) { // Make the value compressible. A purely random string doesn't compress // and the resultant data block will not be compressed - std::string value(RandomString(&rnd, 128) + zero_str); + std::string value(rnd.RandomString(128) + zero_str); assert(Put(Key(i), value) == Status::OK()); } - Flush(); + ASSERT_OK(Flush()); std::vector key_data(10); std::vector keys; @@ -1729,15 +2249,451 @@ keys.data(), values.data(), statuses.data(), true); } -class DBBasicTestWithParallelIO - : public DBTestBase, - public testing::WithParamInterface> { +TEST_F(DBBasicTest, IncrementalRecoveryNoCorrupt) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu", "eevee"}, options); + size_t num_cfs = handles_.size(); + ASSERT_EQ(3, num_cfs); + WriteOptions write_opts; + write_opts.disableWAL = true; + for (size_t cf = 0; cf != num_cfs; ++cf) { + for (size_t i = 0; i != 10000; ++i) { + std::string key_str = Key(static_cast(i)); + std::string value_str = std::to_string(cf) + "_" + std::to_string(i); + + ASSERT_OK(Put(static_cast(cf), key_str, value_str)); + if (0 == (i % 1000)) { + ASSERT_OK(Flush(static_cast(cf))); + } + } + } + for (size_t cf = 0; cf != num_cfs; ++cf) { + ASSERT_OK(Flush(static_cast(cf))); + } + Close(); + options.best_efforts_recovery = true; + ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu", "eevee"}, + options); + num_cfs = handles_.size(); + ASSERT_EQ(3, num_cfs); + for (size_t cf = 0; cf != num_cfs; ++cf) { + for (int i = 0; i != 10000; ++i) { + std::string key_str = Key(static_cast(i)); + std::string expected_value_str = + std::to_string(cf) + "_" + std::to_string(i); + ASSERT_EQ(expected_value_str, Get(static_cast(cf), key_str)); + } + } +} + +TEST_F(DBBasicTest, BestEffortsRecoveryWithVersionBuildingFailure) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + ASSERT_OK(Put("foo", "value")); + ASSERT_OK(Flush()); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->SetCallBack( + "VersionBuilder::CheckConsistencyBeforeReturn", [&](void* arg) { + ASSERT_NE(nullptr, arg); + *(reinterpret_cast(arg)) = + Status::Corruption("Inject corruption"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + options.best_efforts_recovery = true; + Status s = TryReopen(options); + ASSERT_TRUE(s.IsCorruption()); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +#ifndef ROCKSDB_LITE +namespace { +class TableFileListener : public EventListener { public: - DBBasicTestWithParallelIO() : DBTestBase("/db_basic_test_with_parallel_io") { - bool compressed_cache = std::get<0>(GetParam()); - bool uncompressed_cache = std::get<1>(GetParam()); - compression_enabled_ = std::get<2>(GetParam()); - fill_cache_ = std::get<3>(GetParam()); + void OnTableFileCreated(const TableFileCreationInfo& info) override { + InstrumentedMutexLock lock(&mutex_); + cf_to_paths_[info.cf_name].push_back(info.file_path); + } + std::vector& GetFiles(const std::string& cf_name) { + InstrumentedMutexLock lock(&mutex_); + return cf_to_paths_[cf_name]; + } + + private: + InstrumentedMutex mutex_; + std::unordered_map> cf_to_paths_; +}; +} // namespace + +TEST_F(DBBasicTest, LastSstFileNotInManifest) { + // If the last sst file is not tracked in MANIFEST, + // or the VersionEdit for the last sst file is not synced, + // on recovery, the last sst file should be deleted, + // and new sst files shouldn't reuse its file number. + Options options = CurrentOptions(); + DestroyAndReopen(options); + Close(); + + // Manually add a sst file. + constexpr uint64_t kSstFileNumber = 100; + const std::string kSstFile = MakeTableFileName(dbname_, kSstFileNumber); + ASSERT_OK(WriteStringToFile(env_, /* data = */ "bad sst file content", + /* fname = */ kSstFile, + /* should_sync = */ true)); + ASSERT_OK(env_->FileExists(kSstFile)); + + TableFileListener* listener = new TableFileListener(); + options.listeners.emplace_back(listener); + Reopen(options); + // kSstFile should already be deleted. + ASSERT_TRUE(env_->FileExists(kSstFile).IsNotFound()); + + ASSERT_OK(Put("k", "v")); + ASSERT_OK(Flush()); + // New sst file should have file number > kSstFileNumber. + std::vector& files = + listener->GetFiles(kDefaultColumnFamilyName); + ASSERT_EQ(files.size(), 1); + const std::string fname = files[0].erase(0, (dbname_ + "/").size()); + uint64_t number = 0; + FileType type = kTableFile; + ASSERT_TRUE(ParseFileName(fname, &number, &type)); + ASSERT_EQ(type, kTableFile); + ASSERT_GT(number, kSstFileNumber); +} + +TEST_F(DBBasicTest, RecoverWithMissingFiles) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + TableFileListener* listener = new TableFileListener(); + // Disable auto compaction to simplify SST file name tracking. + options.disable_auto_compactions = true; + options.listeners.emplace_back(listener); + CreateAndReopenWithCF({"pikachu", "eevee"}, options); + std::vector all_cf_names = {kDefaultColumnFamilyName, "pikachu", + "eevee"}; + size_t num_cfs = handles_.size(); + ASSERT_EQ(3, num_cfs); + for (size_t cf = 0; cf != num_cfs; ++cf) { + ASSERT_OK(Put(static_cast(cf), "a", "0_value")); + ASSERT_OK(Flush(static_cast(cf))); + ASSERT_OK(Put(static_cast(cf), "b", "0_value")); + ASSERT_OK(Flush(static_cast(cf))); + ASSERT_OK(Put(static_cast(cf), "c", "0_value")); + ASSERT_OK(Flush(static_cast(cf))); + } + + // Delete and corrupt files + for (size_t i = 0; i < all_cf_names.size(); ++i) { + std::vector& files = listener->GetFiles(all_cf_names[i]); + ASSERT_EQ(3, files.size()); + std::string corrupted_data; + ASSERT_OK(ReadFileToString(env_, files[files.size() - 1], &corrupted_data)); + ASSERT_OK(WriteStringToFile( + env_, corrupted_data.substr(0, corrupted_data.size() - 2), + files[files.size() - 1], /*should_sync=*/true)); + for (int j = static_cast(files.size() - 2); j >= static_cast(i); + --j) { + ASSERT_OK(env_->DeleteFile(files[j])); + } + } + options.best_efforts_recovery = true; + ReopenWithColumnFamilies(all_cf_names, options); + // Verify data + ReadOptions read_opts; + read_opts.total_order_seek = true; + { + std::unique_ptr iter(db_->NewIterator(read_opts, handles_[0])); + iter->SeekToFirst(); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); + iter.reset(db_->NewIterator(read_opts, handles_[1])); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); + iter.reset(db_->NewIterator(read_opts, handles_[2])); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a", iter->key()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("b", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); + } +} + +TEST_F(DBBasicTest, BestEffortsRecoveryTryMultipleManifests) { + Options options = CurrentOptions(); + options.env = env_; + DestroyAndReopen(options); + ASSERT_OK(Put("foo", "value0")); + ASSERT_OK(Flush()); + Close(); + { + // Hack by adding a new MANIFEST with high file number + std::string garbage(10, '\0'); + ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/MANIFEST-001000", + /*should_sync=*/true)); + } + { + // Hack by adding a corrupted SST not referenced by any MANIFEST + std::string garbage(10, '\0'); + ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/001001.sst", + /*should_sync=*/true)); + } + + options.best_efforts_recovery = true; + + Reopen(options); + ASSERT_OK(Put("bar", "value")); +} + +TEST_F(DBBasicTest, RecoverWithNoCurrentFile) { + Options options = CurrentOptions(); + options.env = env_; + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + options.best_efforts_recovery = true; + ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); + ASSERT_EQ(2, handles_.size()); + ASSERT_OK(Put("foo", "value")); + ASSERT_OK(Put(1, "bar", "value")); + ASSERT_OK(Flush()); + ASSERT_OK(Flush(1)); + Close(); + ASSERT_OK(env_->DeleteFile(CurrentFileName(dbname_))); + ReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, options); + std::vector cf_names; + ASSERT_OK(DB::ListColumnFamilies(DBOptions(options), dbname_, &cf_names)); + ASSERT_EQ(2, cf_names.size()); + for (const auto& name : cf_names) { + ASSERT_TRUE(name == kDefaultColumnFamilyName || name == "pikachu"); + } +} + +TEST_F(DBBasicTest, RecoverWithNoManifest) { + Options options = CurrentOptions(); + options.env = env_; + DestroyAndReopen(options); + ASSERT_OK(Put("foo", "value")); + ASSERT_OK(Flush()); + Close(); + { + // Delete all MANIFEST. + std::vector files; + ASSERT_OK(env_->GetChildren(dbname_, &files)); + for (const auto& file : files) { + uint64_t number = 0; + FileType type = kWalFile; + if (ParseFileName(file, &number, &type) && type == kDescriptorFile) { + ASSERT_OK(env_->DeleteFile(dbname_ + "/" + file)); + } + } + } + options.best_efforts_recovery = true; + options.create_if_missing = false; + Status s = TryReopen(options); + ASSERT_TRUE(s.IsInvalidArgument()); + options.create_if_missing = true; + Reopen(options); + // Since no MANIFEST exists, best-efforts recovery creates a new, empty db. + ASSERT_EQ("NOT_FOUND", Get("foo")); +} + +TEST_F(DBBasicTest, SkipWALIfMissingTableFiles) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + TableFileListener* listener = new TableFileListener(); + options.listeners.emplace_back(listener); + CreateAndReopenWithCF({"pikachu"}, options); + std::vector kAllCfNames = {kDefaultColumnFamilyName, "pikachu"}; + size_t num_cfs = handles_.size(); + ASSERT_EQ(2, num_cfs); + for (int cf = 0; cf < static_cast(kAllCfNames.size()); ++cf) { + ASSERT_OK(Put(cf, "a", "0_value")); + ASSERT_OK(Flush(cf)); + ASSERT_OK(Put(cf, "b", "0_value")); + } + // Delete files + for (size_t i = 0; i < kAllCfNames.size(); ++i) { + std::vector& files = listener->GetFiles(kAllCfNames[i]); + ASSERT_EQ(1, files.size()); + for (int j = static_cast(files.size() - 1); j >= static_cast(i); + --j) { + ASSERT_OK(env_->DeleteFile(files[j])); + } + } + options.best_efforts_recovery = true; + ReopenWithColumnFamilies(kAllCfNames, options); + // Verify WAL is not applied + ReadOptions read_opts; + read_opts.total_order_seek = true; + std::unique_ptr iter(db_->NewIterator(read_opts, handles_[0])); + iter->SeekToFirst(); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); + iter.reset(db_->NewIterator(read_opts, handles_[1])); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ("a", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); +} + +TEST_F(DBBasicTest, DisableTrackWal) { + // If WAL tracking was enabled, and then disabled during reopen, + // the previously tracked WALs should be removed from MANIFEST. + + Options options = CurrentOptions(); + options.track_and_verify_wals_in_manifest = true; + // extremely small write buffer size, + // so that new WALs are created more frequently. + options.write_buffer_size = 100; + options.env = env_; + DestroyAndReopen(options); + for (int i = 0; i < 100; i++) { + ASSERT_OK(Put("foo" + std::to_string(i), "value" + std::to_string(i))); + } + ASSERT_OK(dbfull()->TEST_SwitchMemtable()); + ASSERT_OK(db_->SyncWAL()); + // Some WALs are tracked. + ASSERT_FALSE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); + Close(); + + // Disable WAL tracking. + options.track_and_verify_wals_in_manifest = false; + options.create_if_missing = false; + ASSERT_OK(TryReopen(options)); + // Previously tracked WALs are cleared. + ASSERT_TRUE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); + Close(); + + // Re-enable WAL tracking again. + options.track_and_verify_wals_in_manifest = true; + options.create_if_missing = false; + ASSERT_OK(TryReopen(options)); + ASSERT_TRUE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty()); + Close(); +} +#endif // !ROCKSDB_LITE + +TEST_F(DBBasicTest, ManifestChecksumMismatch) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + ASSERT_OK(Put("bar", "value")); + ASSERT_OK(Flush()); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->SetCallBack( + "LogWriter::EmitPhysicalRecord:BeforeEncodeChecksum", [&](void* arg) { + auto* crc = reinterpret_cast(arg); + *crc = *crc + 1; + }); + SyncPoint::GetInstance()->EnableProcessing(); + + WriteOptions write_opts; + write_opts.disableWAL = true; + Status s = db_->Put(write_opts, "foo", "value"); + ASSERT_OK(s); + ASSERT_OK(Flush()); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + ASSERT_OK(Put("foo", "value1")); + ASSERT_OK(Flush()); + s = TryReopen(options); + ASSERT_TRUE(s.IsCorruption()); +} + +TEST_F(DBBasicTest, ConcurrentlyCloseDB) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + std::vector workers; + for (int i = 0; i < 10; i++) { + workers.push_back(std::thread([&]() { + auto s = db_->Close(); + ASSERT_OK(s); + })); + } + for (auto& w : workers) { + w.join(); + } +} + +#ifndef ROCKSDB_LITE +class DBBasicTestTrackWal : public DBTestBase, + public testing::WithParamInterface { + public: + DBBasicTestTrackWal() + : DBTestBase("db_basic_test_track_wal", /*env_do_fsync=*/false) {} + + int CountWalFiles() { + VectorLogPtr log_files; + EXPECT_OK(dbfull()->GetSortedWalFiles(log_files)); + return static_cast(log_files.size()); + }; +}; + +TEST_P(DBBasicTestTrackWal, DoNotTrackObsoleteWal) { + // If a WAL becomes obsolete after flushing, but is not deleted from disk yet, + // then if SyncWAL is called afterwards, the obsolete WAL should not be + // tracked in MANIFEST. + + Options options = CurrentOptions(); + options.create_if_missing = true; + options.track_and_verify_wals_in_manifest = true; + options.atomic_flush = GetParam(); + + DestroyAndReopen(options); + CreateAndReopenWithCF({"cf"}, options); + ASSERT_EQ(handles_.size(), 2); // default, cf + // Do not delete WALs. + ASSERT_OK(db_->DisableFileDeletions()); + constexpr int n = 10; + std::vector> wals(n); + for (size_t i = 0; i < n; i++) { + // Generate a new WAL for each key-value. + const int cf = i % 2; + ASSERT_OK(db_->GetCurrentWalFile(&wals[i])); + ASSERT_OK(Put(cf, "k" + std::to_string(i), "v" + std::to_string(i))); + ASSERT_OK(Flush({0, 1})); + } + ASSERT_EQ(CountWalFiles(), n); + // Since all WALs are obsolete, no WAL should be tracked in MANIFEST. + ASSERT_OK(db_->SyncWAL()); + + // Manually delete all WALs. + Close(); + for (const auto& wal : wals) { + ASSERT_OK(env_->DeleteFile(LogFileName(dbname_, wal->LogNumber()))); + } + + // If SyncWAL tracks the obsolete WALs in MANIFEST, + // reopen will fail because the WALs are missing from disk. + ASSERT_OK(TryReopenWithColumnFamilies({"default", "cf"}, options)); + Destroy(options); +} + +INSTANTIATE_TEST_CASE_P(DBBasicTestTrackWal, DBBasicTestTrackWal, + testing::Bool()); +#endif // ROCKSDB_LITE + +class DBBasicTestMultiGet : public DBTestBase { + public: + DBBasicTestMultiGet(std::string test_dir, int num_cfs, bool compressed_cache, + bool uncompressed_cache, bool _compression_enabled, + bool _fill_cache, uint32_t compression_parallel_threads) + : DBTestBase(test_dir, /*env_do_fsync=*/false) { + compression_enabled_ = _compression_enabled; + fill_cache_ = _fill_cache; if (compressed_cache) { std::shared_ptr cache = NewLRUCache(1048576); @@ -1760,10 +2716,17 @@ compression_types = GetSupportedCompressions(); // Not every platform may have compression libraries available, so // dynamically pick based on what's available - if (compression_types.size() == 0) { - compression_enabled_ = false; + CompressionType tmp_type = kNoCompression; + for (auto c_type : compression_types) { + if (c_type != kNoCompression) { + tmp_type = c_type; + break; + } + } + if (tmp_type != kNoCompression) { + options.compression = tmp_type; } else { - options.compression = compression_types[0]; + compression_enabled_ = false; } } #else @@ -1771,7 +2734,7 @@ if (!Snappy_Supported()) { compression_enabled_ = false; } -#endif //ROCKSDB_LITE +#endif // ROCKSDB_LITE table_options.block_cache = uncompressed_cache_; if (table_options.block_cache == nullptr) { @@ -1782,28 +2745,57 @@ table_options.block_cache_compressed = compressed_cache_; table_options.flush_block_policy_factory.reset( new MyFlushBlockPolicyFactory()); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); if (!compression_enabled_) { options.compression = kNoCompression; + } else { + options.compression_opts.parallel_threads = compression_parallel_threads; } + options_ = options; Reopen(options); + if (num_cfs > 1) { + for (int cf = 0; cf < num_cfs; ++cf) { + cf_names_.emplace_back("cf" + std::to_string(cf)); + } + CreateColumnFamilies(cf_names_, options); + cf_names_.emplace_back("default"); + } + std::string zero_str(128, '\0'); - for (int i = 0; i < 100; ++i) { - // Make the value compressible. A purely random string doesn't compress - // and the resultant data block will not be compressed - values_.emplace_back(RandomString(&rnd, 128) + zero_str); - assert(Put(Key(i), values_[i]) == Status::OK()); - } - Flush(); - - for (int i = 0; i < 100; ++i) { - // block cannot gain space by compression - uncompressable_values_.emplace_back(RandomString(&rnd, 256) + '\0'); - std::string tmp_key = "a" + Key(i); - assert(Put(tmp_key, uncompressable_values_[i]) == Status::OK()); + for (int cf = 0; cf < num_cfs; ++cf) { + for (int i = 0; i < 100; ++i) { + // Make the value compressible. A purely random string doesn't compress + // and the resultant data block will not be compressed + values_.emplace_back(rnd.RandomString(128) + zero_str); + assert(((num_cfs == 1) ? Put(Key(i), values_[i]) + : Put(cf, Key(i), values_[i])) == Status::OK()); + } + if (num_cfs == 1) { + EXPECT_OK(Flush()); + } else { + EXPECT_OK(dbfull()->Flush(FlushOptions(), handles_[cf])); + } + + for (int i = 0; i < 100; ++i) { + // block cannot gain space by compression + uncompressable_values_.emplace_back(rnd.RandomString(256) + '\0'); + std::string tmp_key = "a" + Key(i); + assert(((num_cfs == 1) ? Put(tmp_key, uncompressable_values_[i]) + : Put(cf, tmp_key, uncompressable_values_[i])) == + Status::OK()); + } + if (num_cfs == 1) { + EXPECT_OK(Flush()); + } else { + EXPECT_OK(dbfull()->Flush(FlushOptions(), handles_[cf])); + } + } + // Clear compressed cache, which is always pre-populated + if (compressed_cache_) { + compressed_cache_->SetCapacity(0); + compressed_cache_->SetCapacity(1048576); } - Flush(); } bool CheckValue(int i, const std::string& value) { @@ -1820,6 +2812,8 @@ return false; } + const std::vector& GetCFNames() const { return cf_names_; } + int num_lookups() { return uncompressed_cache_->num_lookups(); } int num_found() { return uncompressed_cache_->num_found(); } int num_inserts() { return uncompressed_cache_->num_inserts(); } @@ -1832,11 +2826,12 @@ bool compression_enabled() { return compression_enabled_; } bool has_compressed_cache() { return compressed_cache_ != nullptr; } bool has_uncompressed_cache() { return uncompressed_cache_ != nullptr; } + Options get_options() { return options_; } static void SetUpTestCase() {} static void TearDownTestCase() {} - private: + protected: class MyFlushBlockPolicyFactory : public FlushBlockPolicyFactory { public: MyFlushBlockPolicyFactory() {} @@ -1877,23 +2872,27 @@ const BlockBuilder& data_block_builder_; }; - class MyBlockCache : public Cache { + class MyBlockCache : public CacheWrapper { public: - explicit MyBlockCache(std::shared_ptr& target) - : target_(target), num_lookups_(0), num_found_(0), num_inserts_(0) {} - - virtual const char* Name() const override { return "MyBlockCache"; } - - virtual Status Insert(const Slice& key, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value), - Handle** handle = nullptr, - Priority priority = Priority::LOW) override { + explicit MyBlockCache(std::shared_ptr target) + : CacheWrapper(target), + num_lookups_(0), + num_found_(0), + num_inserts_(0) {} + + const char* Name() const override { return "MyBlockCache"; } + + using Cache::Insert; + Status Insert(const Slice& key, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value), + Handle** handle = nullptr, + Priority priority = Priority::LOW) override { num_inserts_++; return target_->Insert(key, value, charge, deleter, handle, priority); } - virtual Handle* Lookup(const Slice& key, - Statistics* stats = nullptr) override { + using Cache::Lookup; + Handle* Lookup(const Slice& key, Statistics* stats = nullptr) override { num_lookups_++; Handle* handle = target_->Lookup(key, stats); if (handle != nullptr) { @@ -1901,57 +2900,6 @@ } return handle; } - - virtual bool Ref(Handle* handle) override { return target_->Ref(handle); } - - virtual bool Release(Handle* handle, bool force_erase = false) override { - return target_->Release(handle, force_erase); - } - - virtual void* Value(Handle* handle) override { - return target_->Value(handle); - } - - virtual void Erase(const Slice& key) override { target_->Erase(key); } - virtual uint64_t NewId() override { return target_->NewId(); } - - virtual void SetCapacity(size_t capacity) override { - target_->SetCapacity(capacity); - } - - virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override { - target_->SetStrictCapacityLimit(strict_capacity_limit); - } - - virtual bool HasStrictCapacityLimit() const override { - return target_->HasStrictCapacityLimit(); - } - - virtual size_t GetCapacity() const override { - return target_->GetCapacity(); - } - - virtual size_t GetUsage() const override { return target_->GetUsage(); } - - virtual size_t GetUsage(Handle* handle) const override { - return target_->GetUsage(handle); - } - - virtual size_t GetPinnedUsage() const override { - return target_->GetPinnedUsage(); - } - - virtual size_t GetCharge(Handle* /*handle*/) const override { return 0; } - - virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t), - bool thread_safe) override { - return target_->ApplyToAllCacheEntries(callback, thread_safe); - } - - virtual void EraseUnRefEntries() override { - return target_->EraseUnRefEntries(); - } - int num_lookups() { return num_lookups_; } int num_found() { return num_found_; } @@ -1959,7 +2907,6 @@ int num_inserts() { return num_inserts_; } private: - std::shared_ptr target_; int num_lookups_; int num_found_; int num_inserts_; @@ -1967,10 +2914,24 @@ std::shared_ptr compressed_cache_; std::shared_ptr uncompressed_cache_; + Options options_; bool compression_enabled_; std::vector values_; std::vector uncompressable_values_; bool fill_cache_; + std::vector cf_names_; +}; + +class DBBasicTestWithParallelIO + : public DBBasicTestMultiGet, + public testing::WithParamInterface< + std::tuple> { + public: + DBBasicTestWithParallelIO() + : DBBasicTestMultiGet("/db_basic_test_with_parallel_io", 1, + std::get<0>(GetParam()), std::get<1>(GetParam()), + std::get<2>(GetParam()), std::get<3>(GetParam()), + std::get<4>(GetParam())) {} }; TEST_P(DBBasicTestWithParallelIO, MultiGet) { @@ -2096,6 +3057,125 @@ } } +#ifndef ROCKSDB_LITE +TEST_P(DBBasicTestWithParallelIO, MultiGetDirectIO) { + class FakeDirectIOEnv : public EnvWrapper { + class FakeDirectIOSequentialFile; + class FakeDirectIORandomAccessFile; + + public: + FakeDirectIOEnv(Env* env) : EnvWrapper(env) {} + static const char* kClassName() { return "FakeDirectIOEnv"; } + const char* Name() const override { return kClassName(); } + + Status NewRandomAccessFile(const std::string& fname, + std::unique_ptr* result, + const EnvOptions& options) override { + std::unique_ptr file; + assert(options.use_direct_reads); + EnvOptions opts = options; + opts.use_direct_reads = false; + Status s = target()->NewRandomAccessFile(fname, &file, opts); + if (!s.ok()) { + return s; + } + result->reset(new FakeDirectIORandomAccessFile(std::move(file))); + return s; + } + + private: + class FakeDirectIOSequentialFile : public SequentialFileWrapper { + public: + FakeDirectIOSequentialFile(std::unique_ptr&& file) + : SequentialFileWrapper(file.get()), file_(std::move(file)) {} + ~FakeDirectIOSequentialFile() {} + + bool use_direct_io() const override { return true; } + size_t GetRequiredBufferAlignment() const override { return 1; } + + private: + std::unique_ptr file_; + }; + + class FakeDirectIORandomAccessFile : public RandomAccessFileWrapper { + public: + FakeDirectIORandomAccessFile(std::unique_ptr&& file) + : RandomAccessFileWrapper(file.get()), file_(std::move(file)) {} + ~FakeDirectIORandomAccessFile() {} + + bool use_direct_io() const override { return true; } + size_t GetRequiredBufferAlignment() const override { return 1; } + + private: + std::unique_ptr file_; + }; + }; + + std::unique_ptr env(new FakeDirectIOEnv(env_)); + Options opts = get_options(); + opts.env = env.get(); + opts.use_direct_reads = true; + Reopen(opts); + + std::vector key_data(10); + std::vector keys; + // We cannot resize a PinnableSlice vector, so just set initial size to + // largest we think we will need + std::vector values(10); + std::vector statuses; + ReadOptions ro; + ro.fill_cache = fill_cache(); + + // Warm up the cache first + key_data.emplace_back(Key(0)); + keys.emplace_back(Slice(key_data.back())); + key_data.emplace_back(Key(50)); + keys.emplace_back(Slice(key_data.back())); + statuses.resize(keys.size()); + + dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), + keys.data(), values.data(), statuses.data(), true); + ASSERT_TRUE(CheckValue(0, values[0].ToString())); + ASSERT_TRUE(CheckValue(50, values[1].ToString())); + + int random_reads = env_->random_read_counter_.Read(); + key_data[0] = Key(1); + key_data[1] = Key(51); + keys[0] = Slice(key_data[0]); + keys[1] = Slice(key_data[1]); + values[0].Reset(); + values[1].Reset(); + if (uncompressed_cache_) { + uncompressed_cache_->SetCapacity(0); + uncompressed_cache_->SetCapacity(1048576); + } + dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), + keys.data(), values.data(), statuses.data(), true); + ASSERT_TRUE(CheckValue(1, values[0].ToString())); + ASSERT_TRUE(CheckValue(51, values[1].ToString())); + + bool read_from_cache = false; + if (fill_cache()) { + if (has_uncompressed_cache()) { + read_from_cache = true; + } else if (has_compressed_cache() && compression_enabled()) { + read_from_cache = true; + } + } + + int expected_reads = random_reads; + if (!compression_enabled() || !has_compressed_cache()) { + expected_reads += 2; + } else { + expected_reads += (read_from_cache ? 0 : 2); + } + if (env_->random_read_counter_.Read() != expected_reads) { + ASSERT_EQ(env_->random_read_counter_.Read(), expected_reads); + } + Close(); +} +#endif // ROCKSDB_LITE + TEST_P(DBBasicTestWithParallelIO, MultiGetWithChecksumMismatch) { std::vector key_data(10); std::vector keys; @@ -2108,13 +3188,13 @@ ro.fill_cache = fill_cache(); SyncPoint::GetInstance()->SetCallBack( - "RetrieveMultipleBlocks:VerifyChecksum", [&](void *status) { - Status* s = static_cast(status); - read_count++; - if (read_count == 2) { - *s = Status::Corruption(); - } - }); + "RetrieveMultipleBlocks:VerifyChecksum", [&](void* status) { + Status* s = static_cast(status); + read_count++; + if (read_count == 2) { + *s = Status::Corruption(); + } + }); SyncPoint::GetInstance()->EnableProcessing(); // Warm up the cache first @@ -2127,7 +3207,7 @@ dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), keys.data(), values.data(), statuses.data(), true); ASSERT_TRUE(CheckValue(0, values[0].ToString())); - //ASSERT_TRUE(CheckValue(50, values[1].ToString())); + // ASSERT_TRUE(CheckValue(50, values[1].ToString())); ASSERT_EQ(statuses[0], Status::OK()); ASSERT_EQ(statuses[1], Status::Corruption()); @@ -2145,10 +3225,10 @@ ro.fill_cache = fill_cache(); SyncPoint::GetInstance()->SetCallBack( - "TableCache::MultiGet:FindTable", [&](void *status) { - Status* s = static_cast(status); - *s = Status::IOError(); - }); + "TableCache::MultiGet:FindTable", [&](void* status) { + Status* s = static_cast(status); + *s = Status::IOError(); + }); // DB open will create table readers unless we reduce the table cache // capacity. // SanitizeOptions will set max_open_files to minimum of 20. Table cache @@ -2157,10 +3237,10 @@ // prevent file open during DB open and force the file to be opened // during MultiGet SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void *arg) { - int* max_open_files = (int*)arg; - *max_open_files = 11; - }); + "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { + int* max_open_files = (int*)arg; + *max_open_files = 11; + }); SyncPoint::GetInstance()->EnableProcessing(); Reopen(CurrentOptions()); @@ -2180,362 +3260,645 @@ SyncPoint::GetInstance()->DisableProcessing(); } -INSTANTIATE_TEST_CASE_P( - ParallelIO, DBBasicTestWithParallelIO, - // Params are as follows - - // Param 0 - Compressed cache enabled - // Param 1 - Uncompressed cache enabled - // Param 2 - Data compression enabled - // Param 3 - ReadOptions::fill_cache - ::testing::Combine(::testing::Bool(), ::testing::Bool(), - ::testing::Bool(), ::testing::Bool())); - -class DBBasicTestWithTimestampBase : public DBTestBase { - public: - explicit DBBasicTestWithTimestampBase(const std::string& dbname) - : DBTestBase(dbname) {} +INSTANTIATE_TEST_CASE_P(ParallelIO, DBBasicTestWithParallelIO, + // Params are as follows - + // Param 0 - Compressed cache enabled + // Param 1 - Uncompressed cache enabled + // Param 2 - Data compression enabled + // Param 3 - ReadOptions::fill_cache + // Param 4 - CompressionOptions::parallel_threads + ::testing::Combine(::testing::Bool(), ::testing::Bool(), + ::testing::Bool(), ::testing::Bool(), + ::testing::Values(1, 4))); - protected: - class TestComparatorBase : public Comparator { - public: - explicit TestComparatorBase(size_t ts_sz) : Comparator(ts_sz) {} +// Forward declaration +class DeadlineFS; - const char* Name() const override { return "TestComparator"; } +class DeadlineRandomAccessFile : public FSRandomAccessFileOwnerWrapper { + public: + DeadlineRandomAccessFile(DeadlineFS& fs, + std::unique_ptr& file) + : FSRandomAccessFileOwnerWrapper(std::move(file)), fs_(fs) {} + + IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts, + Slice* result, char* scratch, + IODebugContext* dbg) const override; - void FindShortSuccessor(std::string*) const override {} + IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, + const IOOptions& options, IODebugContext* dbg) override; - void FindShortestSeparator(std::string*, const Slice&) const override {} + private: + DeadlineFS& fs_; + std::unique_ptr file_; +}; - int Compare(const Slice& a, const Slice& b) const override { - int r = CompareWithoutTimestamp(a, b); - if (r != 0 || 0 == timestamp_size()) { - return r; +class DeadlineFS : public FileSystemWrapper { + public: + // The error_on_delay parameter specifies whether a IOStatus::TimedOut() + // status should be returned after delaying the IO to exceed the timeout, + // or to simply delay but return success anyway. The latter mimics the + // behavior of PosixFileSystem, which does not enforce any timeout + explicit DeadlineFS(SpecialEnv* env, bool error_on_delay) + : FileSystemWrapper(env->GetFileSystem()), + deadline_(std::chrono::microseconds::zero()), + io_timeout_(std::chrono::microseconds::zero()), + env_(env), + timedout_(false), + ignore_deadline_(false), + error_on_delay_(error_on_delay) {} + + static const char* kClassName() { return "DeadlineFileSystem"; } + const char* Name() const override { return kClassName(); } + + IOStatus NewRandomAccessFile(const std::string& fname, + const FileOptions& opts, + std::unique_ptr* result, + IODebugContext* dbg) override { + std::unique_ptr file; + IOStatus s = target()->NewRandomAccessFile(fname, opts, &file, dbg); + EXPECT_OK(s); + result->reset(new DeadlineRandomAccessFile(*this, file)); + + const std::chrono::microseconds deadline = GetDeadline(); + const std::chrono::microseconds io_timeout = GetIOTimeout(); + if (deadline.count() || io_timeout.count()) { + AssertDeadline(deadline, io_timeout, opts.io_options); + } + return ShouldDelay(opts.io_options); + } + + // Set a vector of {IO counter, delay in microseconds, return status} tuples + // that control when to inject a delay and duration of the delay + void SetDelayTrigger(const std::chrono::microseconds deadline, + const std::chrono::microseconds io_timeout, + const int trigger) { + delay_trigger_ = trigger; + io_count_ = 0; + deadline_ = deadline; + io_timeout_ = io_timeout; + timedout_ = false; + } + + // Increment the IO counter and return a delay in microseconds + IOStatus ShouldDelay(const IOOptions& opts) { + if (timedout_) { + return IOStatus::TimedOut(); + } else if (!deadline_.count() && !io_timeout_.count()) { + return IOStatus::OK(); + } + if (!ignore_deadline_ && delay_trigger_ == io_count_++) { + env_->SleepForMicroseconds(static_cast(opts.timeout.count() + 1)); + timedout_ = true; + if (error_on_delay_) { + return IOStatus::TimedOut(); } - return CompareTimestamp( - Slice(a.data() + a.size() - timestamp_size(), timestamp_size()), - Slice(b.data() + b.size() - timestamp_size(), timestamp_size())); } + return IOStatus::OK(); + } - virtual int CompareImpl(const Slice& a, const Slice& b) const = 0; + const std::chrono::microseconds GetDeadline() { + return ignore_deadline_ ? std::chrono::microseconds::zero() : deadline_; + } - int CompareWithoutTimestamp(const Slice& a, const Slice& b) const override { - assert(a.size() >= timestamp_size()); - assert(b.size() >= timestamp_size()); - Slice k1 = StripTimestampFromUserKey(a, timestamp_size()); - Slice k2 = StripTimestampFromUserKey(b, timestamp_size()); + const std::chrono::microseconds GetIOTimeout() { + return ignore_deadline_ ? std::chrono::microseconds::zero() : io_timeout_; + } - return CompareImpl(k1, k2); - } + bool TimedOut() { return timedout_; } - int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override { - if (!ts1.data() && !ts2.data()) { - return 0; - } else if (ts1.data() && !ts2.data()) { - return 1; - } else if (!ts1.data() && ts2.data()) { - return -1; - } - assert(ts1.size() == ts2.size()); - uint64_t low1 = 0; - uint64_t low2 = 0; - uint64_t high1 = 0; - uint64_t high2 = 0; - auto* ptr1 = const_cast(&ts1); - auto* ptr2 = const_cast(&ts2); - if (!GetFixed64(ptr1, &low1) || !GetFixed64(ptr1, &high1) || - !GetFixed64(ptr2, &low2) || !GetFixed64(ptr2, &high2)) { - assert(false); - } - if (high1 < high2) { - return 1; - } else if (high1 > high2) { - return -1; - } - if (low1 < low2) { - return 1; - } else if (low1 > low2) { - return -1; + void IgnoreDeadline(bool ignore) { ignore_deadline_ = ignore; } + + void AssertDeadline(const std::chrono::microseconds deadline, + const std::chrono::microseconds io_timeout, + const IOOptions& opts) const { + // Give a leeway of +- 10us as it can take some time for the Get/ + // MultiGet call to reach here, in order to avoid false alarms + std::chrono::microseconds now = + std::chrono::microseconds(env_->NowMicros()); + std::chrono::microseconds timeout; + if (deadline.count()) { + timeout = deadline - now; + if (io_timeout.count()) { + timeout = std::min(timeout, io_timeout); } - return 0; + } else { + timeout = io_timeout; + } + if (opts.timeout != timeout) { + ASSERT_EQ(timeout, opts.timeout); } - }; - - Slice EncodeTimestamp(uint64_t low, uint64_t high, std::string* ts) { - assert(nullptr != ts); - ts->clear(); - PutFixed64(ts, low); - PutFixed64(ts, high); - assert(ts->size() == sizeof(low) + sizeof(high)); - return Slice(*ts); } + + private: + // The number of IOs to trigger the delay after + int delay_trigger_; + // Current IO count + int io_count_; + // ReadOptions deadline for the Get/MultiGet/Iterator + std::chrono::microseconds deadline_; + // ReadOptions io_timeout for the Get/MultiGet/Iterator + std::chrono::microseconds io_timeout_; + SpecialEnv* env_; + // Flag to indicate whether we injected a delay + bool timedout_; + // Temporarily ignore deadlines/timeouts + bool ignore_deadline_; + // Return IOStatus::TimedOut() or IOStatus::OK() + bool error_on_delay_; }; -class DBBasicTestWithTimestamp : public DBBasicTestWithTimestampBase { +IOStatus DeadlineRandomAccessFile::Read(uint64_t offset, size_t len, + const IOOptions& opts, Slice* result, + char* scratch, + IODebugContext* dbg) const { + const std::chrono::microseconds deadline = fs_.GetDeadline(); + const std::chrono::microseconds io_timeout = fs_.GetIOTimeout(); + IOStatus s; + if (deadline.count() || io_timeout.count()) { + fs_.AssertDeadline(deadline, io_timeout, opts); + } + if (s.ok()) { + s = FSRandomAccessFileWrapper::Read(offset, len, opts, result, scratch, + dbg); + } + if (s.ok()) { + s = fs_.ShouldDelay(opts); + } + return s; +} + +IOStatus DeadlineRandomAccessFile::MultiRead(FSReadRequest* reqs, + size_t num_reqs, + const IOOptions& options, + IODebugContext* dbg) { + const std::chrono::microseconds deadline = fs_.GetDeadline(); + const std::chrono::microseconds io_timeout = fs_.GetIOTimeout(); + IOStatus s; + if (deadline.count() || io_timeout.count()) { + fs_.AssertDeadline(deadline, io_timeout, options); + } + if (s.ok()) { + s = FSRandomAccessFileWrapper::MultiRead(reqs, num_reqs, options, dbg); + } + if (s.ok()) { + s = fs_.ShouldDelay(options); + } + return s; +} + +// A test class for intercepting random reads and injecting artificial +// delays. Used for testing the MultiGet deadline feature +class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet { public: - DBBasicTestWithTimestamp() - : DBBasicTestWithTimestampBase("/db_basic_test_with_timestamp") {} - - protected: - class TestComparator : public TestComparatorBase { - public: - const int kKeyPrefixLength = - 3; // 3: length of "key" in generated keys ("key" + std::to_string(j)) - explicit TestComparator(size_t ts_sz) : TestComparatorBase(ts_sz) {} - - int CompareImpl(const Slice& a, const Slice& b) const override { - int n1 = atoi( - std::string(a.data() + kKeyPrefixLength, a.size() - kKeyPrefixLength) - .c_str()); - int n2 = atoi( - std::string(b.data() + kKeyPrefixLength, b.size() - kKeyPrefixLength) - .c_str()); - return (n1 < n2) ? -1 : (n1 > n2) ? 1 : 0; + DBBasicTestMultiGetDeadline() + : DBBasicTestMultiGet( + "db_basic_test_multiget_deadline" /*Test dir*/, + 10 /*# of column families*/, false /*compressed cache enabled*/, + true /*uncompressed cache enabled*/, true /*compression enabled*/, + true /*ReadOptions.fill_cache*/, + 1 /*# of parallel compression threads*/) {} + + inline void CheckStatus(std::vector& statuses, size_t num_ok) { + for (size_t i = 0; i < statuses.size(); ++i) { + if (i < num_ok) { + EXPECT_OK(statuses[i]); + } else { + if (statuses[i] != Status::TimedOut()) { + EXPECT_EQ(statuses[i], Status::TimedOut()); + } + } } - }; + } }; -#ifndef ROCKSDB_LITE -// A class which remembers the name of each flushed file. -class FlushedFileCollector : public EventListener { - public: - FlushedFileCollector() {} - ~FlushedFileCollector() override {} +TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) { + std::shared_ptr fs = std::make_shared(env_, false); + std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); + Options options = CurrentOptions(); - void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override { - InstrumentedMutexLock lock(&mutex_); - flushed_files_.push_back(info.file_path); + std::shared_ptr cache = NewLRUCache(1048576); + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.env = env.get(); + SetTimeElapseOnlySleepOnReopen(&options); + ReopenWithColumnFamilies(GetCFNames(), options); + + // Test the non-batched version of MultiGet with multiple column + // families + std::vector key_str; + size_t i; + for (i = 0; i < 5; ++i) { + key_str.emplace_back(Key(static_cast(i))); + } + std::vector cfs(key_str.size()); + ; + std::vector keys(key_str.size()); + std::vector values(key_str.size()); + for (i = 0; i < key_str.size(); ++i) { + cfs[i] = handles_[i]; + keys[i] = Slice(key_str[i].data(), key_str[i].size()); } - std::vector GetFlushedFiles() { - std::vector result; - { - InstrumentedMutexLock lock(&mutex_); - result = flushed_files_; - } - return result; + ReadOptions ro; + ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; + // Delay the first IO + fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 0); + + std::vector statuses = dbfull()->MultiGet(ro, cfs, keys, &values); + // The first key is successful because we check after the lookup, but + // subsequent keys fail due to deadline exceeded + CheckStatus(statuses, 1); + + // Clear the cache + cache->SetCapacity(0); + cache->SetCapacity(1048576); + // Test non-batched Multiget with multiple column families and + // introducing an IO delay in one of the middle CFs + key_str.clear(); + for (i = 0; i < 10; ++i) { + key_str.emplace_back(Key(static_cast(i))); + } + cfs.resize(key_str.size()); + keys.resize(key_str.size()); + values.resize(key_str.size()); + for (i = 0; i < key_str.size(); ++i) { + // 2 keys per CF + cfs[i] = handles_[i / 2]; + keys[i] = Slice(key_str[i].data(), key_str[i].size()); + } + ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; + fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 1); + statuses = dbfull()->MultiGet(ro, cfs, keys, &values); + CheckStatus(statuses, 3); + + // Test batched MultiGet with an IO delay in the first data block read. + // Both keys in the first CF should succeed as they're in the same data + // block and would form one batch, and we check for deadline between + // batches. + std::vector pin_values(keys.size()); + cache->SetCapacity(0); + cache->SetCapacity(1048576); + statuses.clear(); + statuses.resize(keys.size()); + ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; + fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 0); + dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), + pin_values.data(), statuses.data()); + CheckStatus(statuses, 2); + + // Similar to the previous one, but an IO delay in the third CF data block + // read + for (PinnableSlice& value : pin_values) { + value.Reset(); + } + cache->SetCapacity(0); + cache->SetCapacity(1048576); + statuses.clear(); + statuses.resize(keys.size()); + ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; + fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 2); + dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), + pin_values.data(), statuses.data()); + CheckStatus(statuses, 6); + + // Similar to the previous one, but an IO delay in the last but one CF + for (PinnableSlice& value : pin_values) { + value.Reset(); + } + cache->SetCapacity(0); + cache->SetCapacity(1048576); + statuses.clear(); + statuses.resize(keys.size()); + ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; + fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 3); + dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(), + pin_values.data(), statuses.data()); + CheckStatus(statuses, 8); + + // Test batched MultiGet with single CF and lots of keys. Inject delay + // into the second batch of keys. As each batch is 32, the first 64 keys, + // i.e first two batches, should succeed and the rest should time out + for (PinnableSlice& value : pin_values) { + value.Reset(); + } + cache->SetCapacity(0); + cache->SetCapacity(1048576); + key_str.clear(); + for (i = 0; i < 100; ++i) { + key_str.emplace_back(Key(static_cast(i))); + } + keys.resize(key_str.size()); + pin_values.clear(); + pin_values.resize(key_str.size()); + for (i = 0; i < key_str.size(); ++i) { + keys[i] = Slice(key_str[i].data(), key_str[i].size()); } + statuses.clear(); + statuses.resize(keys.size()); + ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; + fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 1); + dbfull()->MultiGet(ro, handles_[0], keys.size(), keys.data(), + pin_values.data(), statuses.data()); + CheckStatus(statuses, 64); + Close(); +} - void ClearFlushedFiles() { - InstrumentedMutexLock lock(&mutex_); - flushed_files_.clear(); +TEST_F(DBBasicTest, ManifestWriteFailure) { + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.disable_auto_compactions = true; + options.env = env_; + DestroyAndReopen(options); + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Flush()); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::ProcessManifestWrites:AfterSyncManifest", [&](void* arg) { + ASSERT_NE(nullptr, arg); + auto* s = reinterpret_cast(arg); + ASSERT_OK(*s); + // Manually overwrite return status + *s = Status::IOError(); + }); + SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put("key", "value")); + ASSERT_NOK(Flush()); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->EnableProcessing(); + Reopen(options); +} + +TEST_F(DBBasicTest, DestroyDefaultCfHandle) { + Options options = GetDefaultOptions(); + options.create_if_missing = true; + DestroyAndReopen(options); + CreateAndReopenWithCF({"pikachu"}, options); + for (const auto* h : handles_) { + ASSERT_NE(db_->DefaultColumnFamily(), h); } - private: - std::vector flushed_files_; - InstrumentedMutex mutex_; -}; + // We have two handles to the default column family. The two handles point to + // different ColumnFamilyHandle objects. + assert(db_->DefaultColumnFamily()); + ASSERT_EQ(0U, db_->DefaultColumnFamily()->GetID()); + assert(handles_[0]); + ASSERT_EQ(0U, handles_[0]->GetID()); + + // You can destroy handles_[...]. + for (auto* h : handles_) { + ASSERT_OK(db_->DestroyColumnFamilyHandle(h)); + } + handles_.clear(); + + // But you should not destroy db_->DefaultColumnFamily(), since it's going to + // be deleted in `DBImpl::CloseHelper()`. Before that, it may be used + // elsewhere internally too. + ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily(); + ASSERT_TRUE(db_->DestroyColumnFamilyHandle(default_cf).IsInvalidArgument()); +} -TEST_F(DBBasicTestWithTimestamp, PutAndGetWithCompaction) { - const int kNumKeysPerFile = 8192; - const size_t kNumTimestamps = 2; - const size_t kNumKeysPerTimestamp = (kNumKeysPerFile - 1) / kNumTimestamps; - const size_t kSplitPosBase = kNumKeysPerTimestamp / 2; - Options options = CurrentOptions(); +#ifndef ROCKSDB_LITE +TEST_F(DBBasicTest, VerifyFileChecksums) { + Options options = GetDefaultOptions(); options.create_if_missing = true; options.env = env_; - options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); + DestroyAndReopen(options); + ASSERT_OK(Put("a", "value")); + ASSERT_OK(Flush()); + ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument()); - FlushedFileCollector* collector = new FlushedFileCollector(); - options.listeners.emplace_back(collector); + options.file_checksum_gen_factory = GetFileChecksumGenCrc32cFactory(); + Reopen(options); + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); - std::string tmp; - size_t ts_sz = EncodeTimestamp(0, 0, &tmp).size(); - TestComparator test_cmp(ts_sz); - options.comparator = &test_cmp; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy( - 10 /*bits_per_key*/, false /*use_block_based_builder*/)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(2, num_cfs); - std::vector write_ts_strs(kNumTimestamps); - std::vector read_ts_strs(kNumTimestamps); - std::vector write_ts_list; - std::vector read_ts_list; - - for (size_t i = 0; i != kNumTimestamps; ++i) { - write_ts_list.emplace_back(EncodeTimestamp(i * 2, 0, &write_ts_strs[i])); - read_ts_list.emplace_back(EncodeTimestamp(1 + i * 2, 0, &read_ts_strs[i])); - const Slice& write_ts = write_ts_list.back(); - WriteOptions wopts; - wopts.timestamp = &write_ts; - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { - ASSERT_OK(Put(cf, "key" + std::to_string(j), - "value_" + std::to_string(j) + "_" + std::to_string(i), - wopts)); - if (j == kSplitPosBase + i || j == kNumKeysPerTimestamp - 1) { - // flush all keys with the same timestamp to two sst files, split at - // incremental positions such that lowerlevel[1].smallest.userkey == - // higherlevel[0].largest.userkey - ASSERT_OK(Flush(cf)); - - // compact files (2 at each level) to a lower level such that all keys - // with the same timestamp is at one level, with newer versions at - // higher levels. - CompactionOptions compact_opt; - compact_opt.compression = kNoCompression; - db_->CompactFiles(compact_opt, handles_[cf], - collector->GetFlushedFiles(), - static_cast(kNumTimestamps - i)); - collector->ClearFlushedFiles(); - } - } - } - } - const auto& verify_db_func = [&]() { - for (size_t i = 0; i != kNumTimestamps; ++i) { - ReadOptions ropts; - ropts.timestamp = &read_ts_list[i]; - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - ColumnFamilyHandle* cfh = handles_[cf]; - for (size_t j = 0; j != kNumKeysPerTimestamp; ++j) { - std::string value; - ASSERT_OK(db_->Get(ropts, cfh, "key" + std::to_string(j), &value)); - ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), - value); - } - } + // Write an L0 with checksum computed. + ASSERT_OK(Put("b", "value")); + ASSERT_OK(Flush()); + + ASSERT_OK(db_->VerifyFileChecksums(ReadOptions())); + + // Does the right thing but with the wrong name -- using it should lead to an + // error. + class MisnamedFileChecksumGenerator : public FileChecksumGenCrc32c { + public: + MisnamedFileChecksumGenerator(const FileChecksumGenContext& context) + : FileChecksumGenCrc32c(context) {} + + const char* Name() const override { return "sha1"; } + }; + + class MisnamedFileChecksumGenFactory : public FileChecksumGenCrc32cFactory { + public: + std::unique_ptr CreateFileChecksumGenerator( + const FileChecksumGenContext& context) override { + return std::unique_ptr( + new MisnamedFileChecksumGenerator(context)); } }; - verify_db_func(); + + options.file_checksum_gen_factory.reset(new MisnamedFileChecksumGenFactory()); + Reopen(options); + ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument()); } #endif // !ROCKSDB_LITE -class DBBasicTestWithTimestampWithParam - : public DBBasicTestWithTimestampBase, - public testing::WithParamInterface { - public: - DBBasicTestWithTimestampWithParam() - : DBBasicTestWithTimestampBase( - "/db_basic_test_with_timestamp_with_param") {} +// A test class for intercepting random reads and injecting artificial +// delays. Used for testing the deadline/timeout feature +class DBBasicTestDeadline + : public DBBasicTest, + public testing::WithParamInterface> {}; + +TEST_P(DBBasicTestDeadline, PointLookupDeadline) { + std::shared_ptr fs = std::make_shared(env_, true); + std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); + bool set_deadline = std::get<0>(GetParam()); + bool set_timeout = std::get<1>(GetParam()); + + for (int option_config = kDefault; option_config < kEnd; ++option_config) { + if (ShouldSkipOptions(option_config, kSkipPlainTable | kSkipMmapReads)) { + continue; + } + option_config_ = option_config; + Options options = CurrentOptions(); + if (options.use_direct_reads) { + continue; + } + options.env = env.get(); + options.disable_auto_compactions = true; + Cache* block_cache = nullptr; + // Fileter block reads currently don't cause the request to get + // aborted on a read timeout, so its possible those block reads + // may get issued even if the deadline is past + SyncPoint::GetInstance()->SetCallBack( + "BlockBasedTable::Get:BeforeFilterMatch", + [&](void* /*arg*/) { fs->IgnoreDeadline(true); }); + SyncPoint::GetInstance()->SetCallBack( + "BlockBasedTable::Get:AfterFilterMatch", + [&](void* /*arg*/) { fs->IgnoreDeadline(false); }); + // DB open will create table readers unless we reduce the table cache + // capacity. + // SanitizeOptions will set max_open_files to minimum of 20. Table cache + // is allocated with max_open_files - 10 as capacity. So override + // max_open_files to 11 so table cache capacity will become 1. This will + // prevent file open during DB open and force the file to be opened + // during MultiGet + SyncPoint::GetInstance()->SetCallBack( + "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { + int* max_open_files = (int*)arg; + *max_open_files = 11; + }); + SyncPoint::GetInstance()->EnableProcessing(); - protected: - class TestComparator : public TestComparatorBase { - private: - const Comparator* cmp_without_ts_; + SetTimeElapseOnlySleepOnReopen(&options); + Reopen(options); - public: - explicit TestComparator(size_t ts_sz) - : TestComparatorBase(ts_sz), cmp_without_ts_(nullptr) { - cmp_without_ts_ = BytewiseComparator(); + if (options.table_factory) { + block_cache = options.table_factory->GetOptions( + TableFactory::kBlockCacheOpts()); } - int CompareImpl(const Slice& a, const Slice& b) const override { - return cmp_without_ts_->Compare(a, b); + Random rnd(301); + for (int i = 0; i < 400; ++i) { + std::string key = "k" + ToString(i); + ASSERT_OK(Put(key, rnd.RandomString(100))); } - }; -}; + ASSERT_OK(Flush()); -TEST_P(DBBasicTestWithTimestampWithParam, PutAndGet) { - const int kNumKeysPerFile = 8192; - const size_t kNumTimestamps = 6; - bool memtable_only = GetParam(); - Options options = CurrentOptions(); - options.create_if_missing = true; - options.env = env_; - options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); - std::string tmp; - size_t ts_sz = EncodeTimestamp(0, 0, &tmp).size(); - TestComparator test_cmp(ts_sz); - options.comparator = &test_cmp; - BlockBasedTableOptions bbto; - bbto.filter_policy.reset(NewBloomFilterPolicy( - 10 /*bits_per_key*/, false /*use_block_based_builder*/)); - bbto.whole_key_filtering = true; - options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + bool timedout = true; + // A timeout will be forced when the IO counter reaches this value + int io_deadline_trigger = 0; + // Keep incrementing io_deadline_trigger and call Get() until there is an + // iteration that doesn't cause a timeout. This ensures that we cover + // all file reads in the point lookup path that can potentially timeout + // and cause the Get() to fail. + while (timedout) { + ReadOptions ro; + if (set_deadline) { + ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; + } + if (set_timeout) { + ro.io_timeout = std::chrono::microseconds{5000}; + } + fs->SetDelayTrigger(ro.deadline, ro.io_timeout, io_deadline_trigger); - std::vector compression_types; - compression_types.push_back(kNoCompression); - if (Zlib_Supported()) { - compression_types.push_back(kZlibCompression); - } -#if LZ4_VERSION_NUMBER >= 10400 // r124+ - compression_types.push_back(kLZ4Compression); - compression_types.push_back(kLZ4HCCompression); -#endif // LZ4_VERSION_NUMBER >= 10400 - if (ZSTD_Supported()) { - compression_types.push_back(kZSTD); - } - - // Switch compression dictionary on/off to check key extraction - // correctness in kBuffered state - std::vector max_dict_bytes_list = {0, 1 << 14}; // 0 or 16KB - - for (auto compression_type : compression_types) { - for (uint32_t max_dict_bytes : max_dict_bytes_list) { - options.compression = compression_type; - options.compression_opts.max_dict_bytes = max_dict_bytes; - if (compression_type == kZSTD) { - options.compression_opts.zstd_max_train_bytes = max_dict_bytes; - } - options.target_file_size_base = 1 << 26; // 64MB - - DestroyAndReopen(options); - CreateAndReopenWithCF({"pikachu"}, options); - size_t num_cfs = handles_.size(); - ASSERT_EQ(2, num_cfs); - std::vector write_ts_strs(kNumTimestamps); - std::vector read_ts_strs(kNumTimestamps); - std::vector write_ts_list; - std::vector read_ts_list; - - for (size_t i = 0; i != kNumTimestamps; ++i) { - write_ts_list.emplace_back( - EncodeTimestamp(i * 2, 0, &write_ts_strs[i])); - read_ts_list.emplace_back( - EncodeTimestamp(1 + i * 2, 0, &read_ts_strs[i])); - const Slice& write_ts = write_ts_list.back(); - WriteOptions wopts; - wopts.timestamp = &write_ts; - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - for (size_t j = 0; j != (kNumKeysPerFile - 1) / kNumTimestamps; ++j) { - ASSERT_OK(Put( - cf, "key" + std::to_string(j), - "value_" + std::to_string(j) + "_" + std::to_string(i), wopts)); - } - if (!memtable_only) { - ASSERT_OK(Flush(cf)); - } - } + block_cache->SetCapacity(0); + block_cache->SetCapacity(1048576); + + std::string value; + Status s = dbfull()->Get(ro, "k50", &value); + if (fs->TimedOut()) { + ASSERT_EQ(s, Status::TimedOut()); + } else { + timedout = false; + ASSERT_OK(s); } - const auto& verify_db_func = [&]() { - for (size_t i = 0; i != kNumTimestamps; ++i) { - ReadOptions ropts; - ropts.timestamp = &read_ts_list[i]; - for (int cf = 0; cf != static_cast(num_cfs); ++cf) { - ColumnFamilyHandle* cfh = handles_[cf]; - for (size_t j = 0; j != (kNumKeysPerFile - 1) / kNumTimestamps; - ++j) { - std::string value; - ASSERT_OK( - db_->Get(ropts, cfh, "key" + std::to_string(j), &value)); - ASSERT_EQ("value_" + std::to_string(j) + "_" + std::to_string(i), - value); - } - } - } - }; - verify_db_func(); + io_deadline_trigger++; } + // Reset the delay sequence in order to avoid false alarms during Reopen + fs->SetDelayTrigger(std::chrono::microseconds::zero(), + std::chrono::microseconds::zero(), 0); } + Close(); } -INSTANTIATE_TEST_CASE_P(Timestamp, DBBasicTestWithTimestampWithParam, - ::testing::Bool()); +TEST_P(DBBasicTestDeadline, IteratorDeadline) { + std::shared_ptr fs = std::make_shared(env_, true); + std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); + bool set_deadline = std::get<0>(GetParam()); + bool set_timeout = std::get<1>(GetParam()); + + for (int option_config = kDefault; option_config < kEnd; ++option_config) { + if (ShouldSkipOptions(option_config, kSkipPlainTable | kSkipMmapReads)) { + continue; + } + Options options = CurrentOptions(); + if (options.use_direct_reads) { + continue; + } + options.env = env.get(); + options.disable_auto_compactions = true; + Cache* block_cache = nullptr; + // DB open will create table readers unless we reduce the table cache + // capacity. + // SanitizeOptions will set max_open_files to minimum of 20. Table cache + // is allocated with max_open_files - 10 as capacity. So override + // max_open_files to 11 so table cache capacity will become 1. This will + // prevent file open during DB open and force the file to be opened + // during MultiGet + SyncPoint::GetInstance()->SetCallBack( + "SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) { + int* max_open_files = (int*)arg; + *max_open_files = 11; + }); + SyncPoint::GetInstance()->EnableProcessing(); -} // namespace ROCKSDB_NAMESPACE + SetTimeElapseOnlySleepOnReopen(&options); + Reopen(options); + + if (options.table_factory) { + block_cache = options.table_factory->GetOptions( + TableFactory::kBlockCacheOpts()); + } + + Random rnd(301); + for (int i = 0; i < 400; ++i) { + std::string key = "k" + ToString(i); + ASSERT_OK(Put(key, rnd.RandomString(100))); + } + ASSERT_OK(Flush()); + + bool timedout = true; + // A timeout will be forced when the IO counter reaches this value + int io_deadline_trigger = 0; + // Keep incrementing io_deadline_trigger and call Get() until there is an + // iteration that doesn't cause a timeout. This ensures that we cover + // all file reads in the point lookup path that can potentially timeout + while (timedout) { + ReadOptions ro; + if (set_deadline) { + ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000}; + } + if (set_timeout) { + ro.io_timeout = std::chrono::microseconds{5000}; + } + fs->SetDelayTrigger(ro.deadline, ro.io_timeout, io_deadline_trigger); -#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS -extern "C" { -void RegisterCustomObjects(int argc, char** argv); + block_cache->SetCapacity(0); + block_cache->SetCapacity(1048576); + + Iterator* iter = dbfull()->NewIterator(ro); + int count = 0; + iter->Seek("k50"); + while (iter->Valid() && count++ < 100) { + iter->Next(); + } + if (fs->TimedOut()) { + ASSERT_FALSE(iter->Valid()); + ASSERT_EQ(iter->status(), Status::TimedOut()); + } else { + timedout = false; + ASSERT_OK(iter->status()); + } + delete iter; + io_deadline_trigger++; + } + // Reset the delay sequence in order to avoid false alarms during Reopen + fs->SetDelayTrigger(std::chrono::microseconds::zero(), + std::chrono::microseconds::zero(), 0); + } + Close(); } -#else -void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {} -#endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS + +// Param 0: If true, set read_options.deadline +// Param 1: If true, set read_options.io_timeout +INSTANTIATE_TEST_CASE_P(DBBasicTestDeadline, DBBasicTestDeadline, + ::testing::Values(std::make_tuple(true, false), + std::make_tuple(false, true), + std::make_tuple(true, true))); +} // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_blob_index_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_blob_index_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_blob_index_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_blob_index_test.cc 1970-01-01 00:00:00.000000000 +0000 @@ -1,436 +0,0 @@ -// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). -// -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include -#include - -#include "db/arena_wrapped_db_iter.h" -#include "db/column_family.h" -#include "db/db_iter.h" -#include "db/db_test_util.h" -#include "db/dbformat.h" -#include "db/write_batch_internal.h" -#include "port/port.h" -#include "port/stack_trace.h" -#include "util/string_util.h" -#include "utilities/merge_operators.h" - -namespace ROCKSDB_NAMESPACE { - -// kTypeBlobIndex is a value type used by BlobDB only. The base rocksdb -// should accept the value type on write, and report not supported value -// for reads, unless caller request for it explicitly. The base rocksdb -// doesn't understand format of actual blob index (the value). -class DBBlobIndexTest : public DBTestBase { - public: - enum Tier { - kMemtable = 0, - kImmutableMemtables = 1, - kL0SstFile = 2, - kLnSstFile = 3, - }; - const std::vector kAllTiers = {Tier::kMemtable, - Tier::kImmutableMemtables, - Tier::kL0SstFile, Tier::kLnSstFile}; - - DBBlobIndexTest() : DBTestBase("/db_blob_index_test") {} - - ColumnFamilyHandle* cfh() { return dbfull()->DefaultColumnFamily(); } - - ColumnFamilyData* cfd() { - return reinterpret_cast(cfh())->cfd(); - } - - Status PutBlobIndex(WriteBatch* batch, const Slice& key, - const Slice& blob_index) { - return WriteBatchInternal::PutBlobIndex(batch, cfd()->GetID(), key, - blob_index); - } - - Status Write(WriteBatch* batch) { - return dbfull()->Write(WriteOptions(), batch); - } - - std::string GetImpl(const Slice& key, bool* is_blob_index = nullptr, - const Snapshot* snapshot = nullptr) { - ReadOptions read_options; - read_options.snapshot = snapshot; - PinnableSlice value; - DBImpl::GetImplOptions get_impl_options; - get_impl_options.column_family = cfh(); - get_impl_options.value = &value; - get_impl_options.is_blob_index = is_blob_index; - auto s = dbfull()->GetImpl(read_options, key, get_impl_options); - if (s.IsNotFound()) { - return "NOT_FOUND"; - } - if (s.IsNotSupported()) { - return "NOT_SUPPORTED"; - } - if (!s.ok()) { - return s.ToString(); - } - return value.ToString(); - } - - std::string GetBlobIndex(const Slice& key, - const Snapshot* snapshot = nullptr) { - bool is_blob_index = false; - std::string value = GetImpl(key, &is_blob_index, snapshot); - if (!is_blob_index) { - return "NOT_BLOB"; - } - return value; - } - - ArenaWrappedDBIter* GetBlobIterator() { - return dbfull()->NewIteratorImpl( - ReadOptions(), cfd(), dbfull()->GetLatestSequenceNumber(), - nullptr /*read_callback*/, true /*allow_blob*/); - } - - Options GetTestOptions() { - Options options; - options.create_if_missing = true; - options.num_levels = 2; - options.disable_auto_compactions = true; - // Disable auto flushes. - options.max_write_buffer_number = 10; - options.min_write_buffer_number_to_merge = 10; - options.merge_operator = MergeOperators::CreateStringAppendOperator(); - return options; - } - - void MoveDataTo(Tier tier) { - switch (tier) { - case Tier::kMemtable: - break; - case Tier::kImmutableMemtables: - ASSERT_OK(dbfull()->TEST_SwitchMemtable()); - break; - case Tier::kL0SstFile: - ASSERT_OK(Flush()); - break; - case Tier::kLnSstFile: - ASSERT_OK(Flush()); - ASSERT_OK(Put("a", "dummy")); - ASSERT_OK(Put("z", "dummy")); - ASSERT_OK(Flush()); - ASSERT_OK( - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); -#ifndef ROCKSDB_LITE - ASSERT_EQ("0,1", FilesPerLevel()); -#endif // !ROCKSDB_LITE - break; - } - } -}; - -// Should be able to write kTypeBlobIndex to memtables and SST files. -TEST_F(DBBlobIndexTest, Write) { - for (auto tier : kAllTiers) { - DestroyAndReopen(GetTestOptions()); - for (int i = 1; i <= 5; i++) { - std::string index = ToString(i); - WriteBatch batch; - ASSERT_OK(PutBlobIndex(&batch, "key" + index, "blob" + index)); - ASSERT_OK(Write(&batch)); - } - MoveDataTo(tier); - for (int i = 1; i <= 5; i++) { - std::string index = ToString(i); - ASSERT_EQ("blob" + index, GetBlobIndex("key" + index)); - } - } -} - -// Get should be able to return blob index if is_blob_index is provided, -// otherwise return Status::NotSupported status. -TEST_F(DBBlobIndexTest, Get) { - for (auto tier : kAllTiers) { - DestroyAndReopen(GetTestOptions()); - WriteBatch batch; - ASSERT_OK(batch.Put("key", "value")); - ASSERT_OK(PutBlobIndex(&batch, "blob_key", "blob_index")); - ASSERT_OK(Write(&batch)); - MoveDataTo(tier); - // Verify normal value - bool is_blob_index = false; - PinnableSlice value; - ASSERT_EQ("value", Get("key")); - ASSERT_EQ("value", GetImpl("key")); - ASSERT_EQ("value", GetImpl("key", &is_blob_index)); - ASSERT_FALSE(is_blob_index); - // Verify blob index - ASSERT_TRUE(Get("blob_key", &value).IsNotSupported()); - ASSERT_EQ("NOT_SUPPORTED", GetImpl("blob_key")); - ASSERT_EQ("blob_index", GetImpl("blob_key", &is_blob_index)); - ASSERT_TRUE(is_blob_index); - } -} - -// Get should NOT return Status::NotSupported if blob index is updated with -// a normal value. -TEST_F(DBBlobIndexTest, Updated) { - for (auto tier : kAllTiers) { - DestroyAndReopen(GetTestOptions()); - WriteBatch batch; - for (int i = 0; i < 10; i++) { - ASSERT_OK(PutBlobIndex(&batch, "key" + ToString(i), "blob_index")); - } - ASSERT_OK(Write(&batch)); - // Avoid blob values from being purged. - const Snapshot* snapshot = dbfull()->GetSnapshot(); - ASSERT_OK(Put("key1", "new_value")); - ASSERT_OK(Merge("key2", "a")); - ASSERT_OK(Merge("key2", "b")); - ASSERT_OK(Merge("key2", "c")); - ASSERT_OK(Delete("key3")); - ASSERT_OK(SingleDelete("key4")); - ASSERT_OK(Delete("key5")); - ASSERT_OK(Merge("key5", "a")); - ASSERT_OK(Merge("key5", "b")); - ASSERT_OK(Merge("key5", "c")); - ASSERT_OK(dbfull()->DeleteRange(WriteOptions(), cfh(), "key6", "key9")); - MoveDataTo(tier); - for (int i = 0; i < 10; i++) { - ASSERT_EQ("blob_index", GetBlobIndex("key" + ToString(i), snapshot)); - } - ASSERT_EQ("new_value", Get("key1")); - ASSERT_EQ("NOT_SUPPORTED", GetImpl("key2")); - ASSERT_EQ("NOT_FOUND", Get("key3")); - ASSERT_EQ("NOT_FOUND", Get("key4")); - ASSERT_EQ("a,b,c", GetImpl("key5")); - for (int i = 6; i < 9; i++) { - ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i))); - } - ASSERT_EQ("blob_index", GetBlobIndex("key9")); - dbfull()->ReleaseSnapshot(snapshot); - } -} - -// Iterator should get blob value if allow_blob flag is set, -// otherwise return Status::NotSupported status. -TEST_F(DBBlobIndexTest, Iterate) { - const std::vector> data = { - /*00*/ {kTypeValue}, - /*01*/ {kTypeBlobIndex}, - /*02*/ {kTypeValue}, - /*03*/ {kTypeBlobIndex, kTypeValue}, - /*04*/ {kTypeValue}, - /*05*/ {kTypeValue, kTypeBlobIndex}, - /*06*/ {kTypeValue}, - /*07*/ {kTypeDeletion, kTypeBlobIndex}, - /*08*/ {kTypeValue}, - /*09*/ {kTypeSingleDeletion, kTypeBlobIndex}, - /*10*/ {kTypeValue}, - /*11*/ {kTypeMerge, kTypeMerge, kTypeMerge, kTypeBlobIndex}, - /*12*/ {kTypeValue}, - /*13*/ - {kTypeMerge, kTypeMerge, kTypeMerge, kTypeDeletion, kTypeBlobIndex}, - /*14*/ {kTypeValue}, - /*15*/ {kTypeBlobIndex}, - /*16*/ {kTypeValue}, - }; - - auto get_key = [](int index) { - char buf[20]; - snprintf(buf, sizeof(buf), "%02d", index); - return "key" + std::string(buf); - }; - - auto get_value = [&](int index, int version) { - return get_key(index) + "_value" + ToString(version); - }; - - auto check_iterator = [&](Iterator* iterator, Status::Code expected_status, - const Slice& expected_value) { - ASSERT_EQ(expected_status, iterator->status().code()); - if (expected_status == Status::kOk) { - ASSERT_TRUE(iterator->Valid()); - ASSERT_EQ(expected_value, iterator->value()); - } else { - ASSERT_FALSE(iterator->Valid()); - } - }; - - auto create_normal_iterator = [&]() -> Iterator* { - return dbfull()->NewIterator(ReadOptions()); - }; - - auto create_blob_iterator = [&]() -> Iterator* { return GetBlobIterator(); }; - - auto check_is_blob = [&](bool is_blob) { - return [is_blob](Iterator* iterator) { - ASSERT_EQ(is_blob, - reinterpret_cast(iterator)->IsBlob()); - }; - }; - - auto verify = [&](int index, Status::Code expected_status, - const Slice& forward_value, const Slice& backward_value, - std::function create_iterator, - std::function extra_check = nullptr) { - // Seek - auto* iterator = create_iterator(); - ASSERT_OK(iterator->Refresh()); - iterator->Seek(get_key(index)); - check_iterator(iterator, expected_status, forward_value); - if (extra_check) { - extra_check(iterator); - } - delete iterator; - - // Next - iterator = create_iterator(); - ASSERT_OK(iterator->Refresh()); - iterator->Seek(get_key(index - 1)); - ASSERT_TRUE(iterator->Valid()); - iterator->Next(); - check_iterator(iterator, expected_status, forward_value); - if (extra_check) { - extra_check(iterator); - } - delete iterator; - - // SeekForPrev - iterator = create_iterator(); - ASSERT_OK(iterator->Refresh()); - iterator->SeekForPrev(get_key(index)); - check_iterator(iterator, expected_status, backward_value); - if (extra_check) { - extra_check(iterator); - } - delete iterator; - - // Prev - iterator = create_iterator(); - iterator->Seek(get_key(index + 1)); - ASSERT_TRUE(iterator->Valid()); - iterator->Prev(); - check_iterator(iterator, expected_status, backward_value); - if (extra_check) { - extra_check(iterator); - } - delete iterator; - }; - - for (auto tier : {Tier::kMemtable} /*kAllTiers*/) { - // Avoid values from being purged. - std::vector snapshots; - DestroyAndReopen(GetTestOptions()); - - // fill data - for (int i = 0; i < static_cast(data.size()); i++) { - for (int j = static_cast(data[i].size()) - 1; j >= 0; j--) { - std::string key = get_key(i); - std::string value = get_value(i, j); - WriteBatch batch; - switch (data[i][j]) { - case kTypeValue: - ASSERT_OK(Put(key, value)); - break; - case kTypeDeletion: - ASSERT_OK(Delete(key)); - break; - case kTypeSingleDeletion: - ASSERT_OK(SingleDelete(key)); - break; - case kTypeMerge: - ASSERT_OK(Merge(key, value)); - break; - case kTypeBlobIndex: - ASSERT_OK(PutBlobIndex(&batch, key, value)); - ASSERT_OK(Write(&batch)); - break; - default: - assert(false); - }; - } - snapshots.push_back(dbfull()->GetSnapshot()); - } - ASSERT_OK( - dbfull()->DeleteRange(WriteOptions(), cfh(), get_key(15), get_key(16))); - snapshots.push_back(dbfull()->GetSnapshot()); - MoveDataTo(tier); - - // Normal iterator - verify(1, Status::kNotSupported, "", "", create_normal_iterator); - verify(3, Status::kNotSupported, "", "", create_normal_iterator); - verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), - create_normal_iterator); - verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), - create_normal_iterator); - verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), - create_normal_iterator); - verify(11, Status::kNotSupported, "", "", create_normal_iterator); - verify(13, Status::kOk, - get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), - get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), - create_normal_iterator); - verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), - create_normal_iterator); - - // Iterator with blob support - verify(1, Status::kOk, get_value(1, 0), get_value(1, 0), - create_blob_iterator, check_is_blob(true)); - verify(3, Status::kOk, get_value(3, 0), get_value(3, 0), - create_blob_iterator, check_is_blob(true)); - verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), - create_blob_iterator, check_is_blob(false)); - verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), - create_blob_iterator, check_is_blob(false)); - verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), - create_blob_iterator, check_is_blob(false)); - verify(11, Status::kNotSupported, "", "", create_blob_iterator); - verify(13, Status::kOk, - get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), - get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), - create_blob_iterator, check_is_blob(false)); - verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), - create_blob_iterator, check_is_blob(false)); - -#ifndef ROCKSDB_LITE - // Iterator with blob support and using seek. - ASSERT_OK(dbfull()->SetOptions( - cfh(), {{"max_sequential_skip_in_iterations", "0"}})); - verify(1, Status::kOk, get_value(1, 0), get_value(1, 0), - create_blob_iterator, check_is_blob(true)); - verify(3, Status::kOk, get_value(3, 0), get_value(3, 0), - create_blob_iterator, check_is_blob(true)); - verify(5, Status::kOk, get_value(5, 0), get_value(5, 0), - create_blob_iterator, check_is_blob(false)); - verify(7, Status::kOk, get_value(8, 0), get_value(6, 0), - create_blob_iterator, check_is_blob(false)); - verify(9, Status::kOk, get_value(10, 0), get_value(8, 0), - create_blob_iterator, check_is_blob(false)); - verify(11, Status::kNotSupported, "", "", create_blob_iterator); - verify(13, Status::kOk, - get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), - get_value(13, 2) + "," + get_value(13, 1) + "," + get_value(13, 0), - create_blob_iterator, check_is_blob(false)); - verify(15, Status::kOk, get_value(16, 0), get_value(14, 0), - create_blob_iterator, check_is_blob(false)); -#endif // !ROCKSDB_LITE - - for (auto* snapshot : snapshots) { - dbfull()->ReleaseSnapshot(snapshot); - } - } -} - -} // namespace ROCKSDB_NAMESPACE - -int main(int argc, char** argv) { - ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_block_cache_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_block_cache_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_block_cache_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_block_cache_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -7,10 +7,21 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include +#include +#include + +#include "cache/cache_entry_roles.h" #include "cache/lru_cache.h" +#include "db/column_family.h" #include "db/db_test_util.h" #include "port/stack_trace.h" +#include "rocksdb/persistent_cache.h" +#include "rocksdb/statistics.h" +#include "rocksdb/table.h" #include "util/compression.h" +#include "util/defer.h" +#include "util/random.h" +#include "utilities/fault_injection_fs.h" namespace ROCKSDB_NAMESPACE { @@ -32,7 +43,8 @@ const size_t kNumBlocks = 10; const size_t kValueSize = 100; - DBBlockCacheTest() : DBTestBase("/db_block_cache_test") {} + DBBlockCacheTest() + : DBTestBase("db_block_cache_test", /*env_do_fsync=*/true) {} BlockBasedTableOptions GetTableOptions() { BlockBasedTableOptions table_options; @@ -47,7 +59,7 @@ options.avoid_flush_during_recovery = false; // options.compression = kNoCompression; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); return options; } @@ -144,6 +156,19 @@ compressed_insert_count_ = new_insert_count; compressed_failure_count_ = new_failure_count; } + +#ifndef ROCKSDB_LITE + const std::array GetCacheEntryRoleCountsBg() { + // Verify in cache entry role stats + ColumnFamilyHandleImpl* cfh = + static_cast(dbfull()->DefaultColumnFamily()); + InternalStats* internal_stats_ptr = cfh->cfd()->internal_stats(); + InternalStats::CacheEntryRoleStats stats; + internal_stats_ptr->TEST_GetCacheEntryRoleStats(&stats, + /*foreground=*/false); + return stats.entry_counts; + } +#endif // ROCKSDB_LITE }; TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) { @@ -153,9 +178,15 @@ auto options = GetOptions(table_options); InitTable(options); - std::shared_ptr cache = NewLRUCache(0, 0, false); + LRUCacheOptions co; + co.capacity = 0; + co.num_shard_bits = 0; + co.strict_capacity_limit = false; + // Needed not to count entry stats collector + co.metadata_charge_policy = kDontChargeCacheMetadata; + std::shared_ptr cache = NewLRUCache(co); table_options.block_cache = cache; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); RecordCacheCounters(options); @@ -177,9 +208,15 @@ auto options = GetOptions(table_options); InitTable(options); - std::shared_ptr cache = NewLRUCache(0, 0, false); + LRUCacheOptions co; + co.capacity = 0; + co.num_shard_bits = 0; + co.strict_capacity_limit = false; + // Needed not to count entry stats collector + co.metadata_charge_policy = kDontChargeCacheMetadata; + std::shared_ptr cache = NewLRUCache(co); table_options.block_cache = cache; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); RecordCacheCounters(options); @@ -187,7 +224,7 @@ Iterator* iter = nullptr; // Load blocks into cache. - for (size_t i = 0; i < kNumBlocks - 1; i++) { + for (size_t i = 0; i + 1 < kNumBlocks; i++) { iter = db_->NewIterator(read_options); iter->Seek(ToString(i)); ASSERT_OK(iter->status()); @@ -209,12 +246,12 @@ iter = nullptr; // Release iterators and access cache again. - for (size_t i = 0; i < kNumBlocks - 1; i++) { + for (size_t i = 0; i + 1 < kNumBlocks; i++) { iterators[i].reset(); CheckCacheCounters(options, 0, 0, 0, 0); } ASSERT_EQ(0, cache->GetPinnedUsage()); - for (size_t i = 0; i < kNumBlocks - 1; i++) { + for (size_t i = 0; i + 1 < kNumBlocks; i++) { iter = db_->NewIterator(read_options); iter->Seek(ToString(i)); ASSERT_OK(iter->status()); @@ -225,34 +262,54 @@ #ifdef SNAPPY TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) { - ReadOptions read_options; - auto table_options = GetTableOptions(); - auto options = GetOptions(table_options); + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + + BlockBasedTableOptions table_options; + table_options.no_block_cache = true; + table_options.block_cache_compressed = nullptr; + table_options.block_size = 1; + table_options.filter_policy.reset(NewBloomFilterPolicy(20)); + table_options.cache_index_and_filter_blocks = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); options.compression = CompressionType::kSnappyCompression; - InitTable(options); - std::shared_ptr cache = NewLRUCache(0, 0, false); + DestroyAndReopen(options); + + std::string value(kValueSize, 'a'); + for (size_t i = 0; i < kNumBlocks; i++) { + ASSERT_OK(Put(ToString(i), value)); + ASSERT_OK(Flush()); + } + + ReadOptions read_options; std::shared_ptr compressed_cache = NewLRUCache(1 << 25, 0, false); + LRUCacheOptions co; + co.capacity = 0; + co.num_shard_bits = 0; + co.strict_capacity_limit = false; + // Needed not to count entry stats collector + co.metadata_charge_policy = kDontChargeCacheMetadata; + std::shared_ptr cache = NewLRUCache(co); table_options.block_cache = cache; + table_options.no_block_cache = false; table_options.block_cache_compressed = compressed_cache; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + table_options.max_auto_readahead_size = 0; + table_options.cache_index_and_filter_blocks = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); RecordCacheCounters(options); - std::vector> iterators(kNumBlocks - 1); - Iterator* iter = nullptr; - // Load blocks into cache. for (size_t i = 0; i < kNumBlocks - 1; i++) { - iter = db_->NewIterator(read_options); - iter->Seek(ToString(i)); - ASSERT_OK(iter->status()); + ASSERT_EQ(value, Get(ToString(i))); CheckCacheCounters(options, 1, 0, 1, 0); CheckCompressedCacheCounters(options, 1, 0, 1, 0); - iterators[i].reset(iter); } + size_t usage = cache->GetUsage(); - ASSERT_LT(0, usage); + ASSERT_EQ(0, usage); ASSERT_EQ(usage, cache->GetPinnedUsage()); size_t compressed_usage = compressed_cache->GetUsage(); ASSERT_LT(0, compressed_usage); @@ -264,24 +321,158 @@ cache->SetCapacity(usage); cache->SetStrictCapacityLimit(true); ASSERT_EQ(usage, cache->GetPinnedUsage()); - iter = db_->NewIterator(read_options); - iter->Seek(ToString(kNumBlocks - 1)); - ASSERT_TRUE(iter->status().IsIncomplete()); + + // Load last key block. + ASSERT_EQ("Result incomplete: Insert failed due to LRU cache being full.", + Get(ToString(kNumBlocks - 1))); + // Failure will also record the miss counter. CheckCacheCounters(options, 1, 0, 0, 1); CheckCompressedCacheCounters(options, 1, 0, 1, 0); - delete iter; - iter = nullptr; // Clear strict capacity limit flag. This time we shall hit compressed block - // cache. + // cache and load into block cache. cache->SetStrictCapacityLimit(false); - iter = db_->NewIterator(read_options); - iter->Seek(ToString(kNumBlocks - 1)); - ASSERT_OK(iter->status()); + // Load last key block. + ASSERT_EQ(value, Get(ToString(kNumBlocks - 1))); CheckCacheCounters(options, 1, 0, 1, 0); CheckCompressedCacheCounters(options, 0, 1, 0, 0); - delete iter; - iter = nullptr; +} + +namespace { +class PersistentCacheFromCache : public PersistentCache { + public: + PersistentCacheFromCache(std::shared_ptr cache, bool read_only) + : cache_(cache), read_only_(read_only) {} + + Status Insert(const Slice& key, const char* data, + const size_t size) override { + if (read_only_) { + return Status::NotSupported(); + } + std::unique_ptr copy{new char[size]}; + std::copy_n(data, size, copy.get()); + Status s = cache_->Insert( + key, copy.get(), size, + GetCacheEntryDeleterForRole()); + if (s.ok()) { + copy.release(); + } + return s; + } + + Status Lookup(const Slice& key, std::unique_ptr* data, + size_t* size) override { + auto handle = cache_->Lookup(key); + if (handle) { + char* ptr = static_cast(cache_->Value(handle)); + *size = cache_->GetCharge(handle); + data->reset(new char[*size]); + std::copy_n(ptr, *size, data->get()); + cache_->Release(handle); + return Status::OK(); + } else { + return Status::NotFound(); + } + } + + bool IsCompressed() override { return false; } + + StatsType Stats() override { return StatsType(); } + + std::string GetPrintableOptions() const override { return ""; } + + uint64_t NewId() override { return cache_->NewId(); } + + private: + std::shared_ptr cache_; + bool read_only_; +}; + +class ReadOnlyCacheWrapper : public CacheWrapper { + using CacheWrapper::CacheWrapper; + + using Cache::Insert; + Status Insert(const Slice& /*key*/, void* /*value*/, size_t /*charge*/, + void (*)(const Slice& key, void* value) /*deleter*/, + Handle** /*handle*/, Priority /*priority*/) override { + return Status::NotSupported(); + } +}; + +} // namespace + +TEST_F(DBBlockCacheTest, TestWithSameCompressed) { + auto table_options = GetTableOptions(); + auto options = GetOptions(table_options); + InitTable(options); + + std::shared_ptr rw_cache{NewLRUCache(1000000)}; + std::shared_ptr rw_pcache{ + new PersistentCacheFromCache(rw_cache, /*read_only*/ false)}; + // Exercise some obscure behavior with read-only wrappers + std::shared_ptr ro_cache{new ReadOnlyCacheWrapper(rw_cache)}; + std::shared_ptr ro_pcache{ + new PersistentCacheFromCache(rw_cache, /*read_only*/ true)}; + + // Simple same pointer + table_options.block_cache = rw_cache; + table_options.block_cache_compressed = rw_cache; + table_options.persistent_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache same as block_cache_compressed not " + "currently supported, and would be bad for performance anyway"); + + // Other cases + table_options.block_cache = ro_cache; + table_options.block_cache_compressed = rw_cache; + table_options.persistent_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache and block_cache_compressed share " + "the same key space, which is not supported"); + + table_options.block_cache = rw_cache; + table_options.block_cache_compressed = ro_cache; + table_options.persistent_cache.reset(); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache_compressed and block_cache share " + "the same key space, which is not supported"); + + table_options.block_cache = ro_cache; + table_options.block_cache_compressed.reset(); + table_options.persistent_cache = rw_pcache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache and persistent_cache share the same " + "key space, which is not supported"); + + table_options.block_cache = rw_cache; + table_options.block_cache_compressed.reset(); + table_options.persistent_cache = ro_pcache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: persistent_cache and block_cache share the same " + "key space, which is not supported"); + + table_options.block_cache.reset(); + table_options.no_block_cache = true; + table_options.block_cache_compressed = ro_cache; + table_options.persistent_cache = rw_pcache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: block_cache_compressed and persistent_cache " + "share the same key space, which is not supported"); + + table_options.block_cache.reset(); + table_options.no_block_cache = true; + table_options.block_cache_compressed = rw_cache; + table_options.persistent_cache = ro_pcache; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + ASSERT_EQ(TryReopen(options).ToString(), + "Invalid argument: persistent_cache and block_cache_compressed " + "share the same key space, which is not supported"); } #endif // SNAPPY @@ -296,7 +487,7 @@ BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.filter_policy.reset(NewBloomFilterPolicy(20)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "key", "val")); @@ -352,7 +543,7 @@ std::shared_ptr cache = NewLRUCache(10, 0, true); table_options.block_cache = cache; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); Reopen(options); ASSERT_OK(Put("key1", "val1")); ASSERT_OK(Put("key2", "val2")); @@ -390,7 +581,7 @@ std::shared_ptr cache = NewLRUCache(co); table_options.block_cache = cache; table_options.filter_policy.reset(NewBloomFilterPolicy(20, true)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "longer_key", "val")); @@ -429,6 +620,183 @@ // filter_bytes_insert); } +#if (defined OS_LINUX || defined OS_WIN) +TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + + BlockBasedTableOptions table_options; + table_options.block_cache = NewLRUCache(1 << 25, 0, false); + table_options.cache_index_and_filter_blocks = false; + table_options.prepopulate_block_cache = + BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + + std::string value(kValueSize, 'a'); + for (size_t i = 1; i <= kNumBlocks; i++) { + ASSERT_OK(Put(ToString(i), value)); + ASSERT_OK(Flush()); + ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); + ASSERT_EQ(value, Get(ToString(i))); + ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS)); + ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT)); + } + // Verify compaction not counted + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + EXPECT_EQ(kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); +} + +// This test cache data, index and filter blocks during flush. +class DBBlockCacheTest1 : public DBTestBase, + public ::testing::WithParamInterface { + public: + const size_t kNumBlocks = 10; + const size_t kValueSize = 100; + DBBlockCacheTest1() : DBTestBase("db_block_cache_test1", true) {} +}; + +INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1, + ::testing::Values(1, 2, 3)); + +TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + + BlockBasedTableOptions table_options; + table_options.block_cache = NewLRUCache(1 << 25, 0, false); + + uint32_t filter_type = GetParam(); + switch (filter_type) { + case 1: // partition_filter + table_options.partition_filters = true; + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + break; + case 2: // block-based filter + table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); + break; + case 3: // full filter + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + break; + default: + assert(false); + } + + table_options.cache_index_and_filter_blocks = true; + table_options.prepopulate_block_cache = + BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + + std::string value(kValueSize, 'a'); + for (size_t i = 1; i <= kNumBlocks; i++) { + ASSERT_OK(Put(ToString(i), value)); + ASSERT_OK(Flush()); + ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); + if (filter_type == 1) { + ASSERT_EQ(2 * i, + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(2 * i, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + } else { + ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + } + ASSERT_EQ(value, Get(ToString(i))); + + ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS)); + ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT)); + + ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT)); + if (filter_type == 1) { + ASSERT_EQ(i * 3, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); + } else { + ASSERT_EQ(i * 2, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT)); + } + ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS)); + } + + // Verify compaction not counted + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr, + /*end=*/nullptr)); + EXPECT_EQ(kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); + // Index and filter blocks are automatically warmed when the new table file + // is automatically opened at the end of compaction. This is not easily + // disabled so results in the new index and filter blocks being warmed. + if (filter_type == 1) { + EXPECT_EQ(2 * (1 + kNumBlocks), + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + EXPECT_EQ(2 * (1 + kNumBlocks), + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + } else { + EXPECT_EQ(1 + kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + EXPECT_EQ(1 + kNumBlocks, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + } +} + +TEST_F(DBBlockCacheTest, DynamicallyWarmCacheDuringFlush) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + + BlockBasedTableOptions table_options; + table_options.block_cache = NewLRUCache(1 << 25, 0, false); + table_options.cache_index_and_filter_blocks = false; + table_options.prepopulate_block_cache = + BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly; + + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + + std::string value(kValueSize, 'a'); + + for (size_t i = 1; i <= 5; i++) { + ASSERT_OK(Put(ToString(i), value)); + ASSERT_OK(Flush()); + ASSERT_EQ(1, + options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD)); + + ASSERT_EQ(value, Get(ToString(i))); + ASSERT_EQ(0, + options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD)); + ASSERT_EQ( + 0, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS)); + ASSERT_EQ(1, + options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT)); + } + + ASSERT_OK(dbfull()->SetOptions( + {{"block_based_table_factory", "{prepopulate_block_cache=kDisable;}"}})); + + for (size_t i = 6; i <= kNumBlocks; i++) { + ASSERT_OK(Put(ToString(i), value)); + ASSERT_OK(Flush()); + ASSERT_EQ(0, + options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD)); + + ASSERT_EQ(value, Get(ToString(i))); + ASSERT_EQ(1, + options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD)); + ASSERT_EQ( + 1, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS)); + ASSERT_EQ(0, + options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT)); + } +} +#endif + namespace { // A mock cache wraps LRUCache, and record how many entries have been @@ -443,15 +811,18 @@ false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/) { } - Status Insert(const Slice& key, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value), Handle** handle, - Priority priority) override { + using ShardedCache::Insert; + + Status Insert(const Slice& key, void* value, + const Cache::CacheItemHelper* helper_cb, size_t charge, + Handle** handle, Priority priority) override { + DeleterFn delete_cb = helper_cb->del_cb; if (priority == Priority::LOW) { low_pri_insert_count++; } else { high_pri_insert_count++; } - return LRUCache::Insert(key, value, charge, deleter, handle, priority); + return LRUCache::Insert(key, value, charge, delete_cb, handle, priority); } }; @@ -471,7 +842,7 @@ table_options.filter_policy.reset(NewBloomFilterPolicy(20)); table_options.cache_index_and_filter_blocks_with_high_priority = priority == Cache::Priority::HIGH ? true : false; - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); MockCache::high_pri_insert_count = 0; @@ -517,6 +888,140 @@ } } +namespace { + +// An LRUCache wrapper that can falsely report "not found" on Lookup. +// This allows us to manipulate BlockBasedTableReader into thinking +// another thread inserted the data in between Lookup and Insert, +// while mostly preserving the LRUCache interface/behavior. +class LookupLiarCache : public CacheWrapper { + int nth_lookup_not_found_ = 0; + + public: + explicit LookupLiarCache(std::shared_ptr target) + : CacheWrapper(std::move(target)) {} + + using Cache::Lookup; + Handle* Lookup(const Slice& key, Statistics* stats) override { + if (nth_lookup_not_found_ == 1) { + nth_lookup_not_found_ = 0; + return nullptr; + } + if (nth_lookup_not_found_ > 1) { + --nth_lookup_not_found_; + } + return CacheWrapper::Lookup(key, stats); + } + + // 1 == next lookup, 2 == after next, etc. + void SetNthLookupNotFound(int n) { nth_lookup_not_found_ = n; } +}; + +} // anonymous namespace + +TEST_F(DBBlockCacheTest, AddRedundantStats) { + const size_t capacity = size_t{1} << 25; + const int num_shard_bits = 0; // 1 shard + int iterations_tested = 0; + for (std::shared_ptr base_cache : + {NewLRUCache(capacity, num_shard_bits), + NewClockCache(capacity, num_shard_bits)}) { + if (!base_cache) { + // Skip clock cache when not supported + continue; + } + ++iterations_tested; + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + + std::shared_ptr cache = + std::make_shared(base_cache); + + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + table_options.block_cache = cache; + table_options.filter_policy.reset(NewBloomFilterPolicy(50)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + + // Create a new table. + ASSERT_OK(Put("foo", "value")); + ASSERT_OK(Put("bar", "value")); + ASSERT_OK(Flush()); + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + + // Normal access filter+index+data. + ASSERT_EQ("value", Get("foo")); + + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); + // -------- + ASSERT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); + // -------- + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); + + // Againt access filter+index+data, but force redundant load+insert on index + cache->SetNthLookupNotFound(2); + ASSERT_EQ("value", Get("bar")); + + ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); + // -------- + ASSERT_EQ(4, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); + ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); + // -------- + ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); + + // Access just filter (with high probability), and force redundant + // load+insert + cache->SetNthLookupNotFound(1); + ASSERT_EQ("NOT_FOUND", Get("this key was not added")); + + EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); + EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); + EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); + // -------- + EXPECT_EQ(5, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); + EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); + EXPECT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); + // -------- + EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); + + // Access just data, forcing redundant load+insert + ReadOptions read_options; + std::unique_ptr iter{db_->NewIterator(read_options)}; + cache->SetNthLookupNotFound(1); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), "bar"); + + EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD)); + EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD)); + EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD)); + // -------- + EXPECT_EQ(6, TestGetTickerCount(options, BLOCK_CACHE_ADD)); + + EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT)); + EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT)); + EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT)); + // -------- + EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT)); + } + EXPECT_GE(iterations_tested, 1); +} + TEST_F(DBBlockCacheTest, ParanoidFileChecks) { Options options = CurrentOptions(); options.create_if_missing = true; @@ -526,7 +1031,7 @@ BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = false; table_options.filter_policy.reset(NewBloomFilterPolicy(20)); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); CreateAndReopenWithCF({"pikachu"}, options); ASSERT_OK(Put(1, "1_key", "val")); @@ -541,7 +1046,7 @@ // Create a new SST file. This will further trigger a compaction // and generate another file. ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(3, /* Totally 3 files created up to now */ TestGetTickerCount(options, BLOCK_CACHE_ADD)); @@ -556,7 +1061,7 @@ ASSERT_OK(Put(1, "1_key4", "val4")); ASSERT_OK(Put(1, "9_key4", "val4")); ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(3, /* Totally 3 files created up to now */ TestGetTickerCount(options, BLOCK_CACHE_ADD)); } @@ -631,7 +1136,7 @@ std::string str; for (int i = 0; i < num_iter; i++) { if (i % 4 == 0) { // high compression ratio - str = RandomString(&rnd, 1000); + str = rnd.RandomString(1000); } values.push_back(str); ASSERT_OK(Put(1, Key(i), values[i])); @@ -701,8 +1206,9 @@ Random rnd(301); for (auto compression_type : compression_types) { Options options = CurrentOptions(); - options.compression = compression_type; - options.compression_opts.max_dict_bytes = 4096; + options.bottommost_compression = compression_type; + options.bottommost_compression_opts.max_dict_bytes = 4096; + options.bottommost_compression_opts.enabled = true; options.create_if_missing = true; options.num_levels = 2; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); @@ -710,7 +1216,7 @@ BlockBasedTableOptions table_options; table_options.cache_index_and_filter_blocks = true; table_options.block_cache.reset(new MockCache()); - options.table_factory.reset(new BlockBasedTableFactory(table_options)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); DestroyAndReopen(options); RecordCacheCountersForCompressionDict(options); @@ -718,12 +1224,12 @@ for (int i = 0; i < kNumFiles; ++i) { ASSERT_EQ(i, NumTableFilesAtLevel(0, 0)); for (int j = 0; j < kNumEntriesPerFile; ++j) { - std::string value = RandomString(&rnd, kNumBytesPerEntry); + std::string value = rnd.RandomString(kNumBytesPerEntry); ASSERT_OK(Put(Key(j * kNumFiles + i), value.c_str())); } ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(1)); @@ -750,8 +1256,628 @@ } } +static void ClearCache(Cache* cache) { + auto roles = CopyCacheDeleterRoleMap(); + std::deque keys; + Cache::ApplyToAllEntriesOptions opts; + auto callback = [&](const Slice& key, void* /*value*/, size_t /*charge*/, + Cache::DeleterFn deleter) { + if (roles.find(deleter) == roles.end()) { + // Keep the stats collector + return; + } + keys.push_back(key.ToString()); + }; + cache->ApplyToAllEntries(callback, opts); + for (auto& k : keys) { + cache->Erase(k); + } +} + +TEST_F(DBBlockCacheTest, CacheEntryRoleStats) { + const size_t capacity = size_t{1} << 25; + int iterations_tested = 0; + for (bool partition : {false, true}) { + for (std::shared_ptr cache : + {NewLRUCache(capacity), NewClockCache(capacity)}) { + if (!cache) { + // Skip clock cache when not supported + continue; + } + ++iterations_tested; + + Options options = CurrentOptions(); + SetTimeElapseOnlySleepOnReopen(&options); + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + options.max_open_files = 13; + options.table_cache_numshardbits = 0; + // If this wakes up, it could interfere with test + options.stats_dump_period_sec = 0; + + BlockBasedTableOptions table_options; + table_options.block_cache = cache; + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(50)); + if (partition) { + table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; + table_options.partition_filters = true; + } + table_options.metadata_cache_options.top_level_index_pinning = + PinningTier::kNone; + table_options.metadata_cache_options.partition_pinning = + PinningTier::kNone; + table_options.metadata_cache_options.unpartitioned_pinning = + PinningTier::kNone; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + + // Create a new table. + ASSERT_OK(Put("foo", "value")); + ASSERT_OK(Put("bar", "value")); + ASSERT_OK(Flush()); + + ASSERT_OK(Put("zfoo", "value")); + ASSERT_OK(Put("zbar", "value")); + ASSERT_OK(Flush()); + + ASSERT_EQ(2, NumTableFilesAtLevel(0)); + + // Fresh cache + ClearCache(cache.get()); + + std::array expected{}; + // For CacheEntryStatsCollector + expected[static_cast(CacheEntryRole::kMisc)] = 1; + EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); + + std::array prev_expected = expected; + + // First access only filters + ASSERT_EQ("NOT_FOUND", Get("different from any key added")); + expected[static_cast(CacheEntryRole::kFilterBlock)] += 2; + if (partition) { + expected[static_cast(CacheEntryRole::kFilterMetaBlock)] += 2; + } + // Within some time window, we will get cached entry stats + EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg()); + // Not enough to force a miss + env_->MockSleepForSeconds(45); + EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg()); + // Enough to force a miss + env_->MockSleepForSeconds(601); + EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); + + // Now access index and data block + ASSERT_EQ("value", Get("foo")); + expected[static_cast(CacheEntryRole::kIndexBlock)]++; + if (partition) { + // top-level + expected[static_cast(CacheEntryRole::kIndexBlock)]++; + } + expected[static_cast(CacheEntryRole::kDataBlock)]++; + // Enough to force a miss + env_->MockSleepForSeconds(601); + // But inject a simulated long scan so that we need a longer + // interval to force a miss next time. + SyncPoint::GetInstance()->SetCallBack( + "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries", + [this](void*) { + // To spend no more than 0.2% of time scanning, we would need + // interval of at least 10000s + env_->MockSleepForSeconds(20); + }); + SyncPoint::GetInstance()->EnableProcessing(); + EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); + prev_expected = expected; + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + // The same for other file + ASSERT_EQ("value", Get("zfoo")); + expected[static_cast(CacheEntryRole::kIndexBlock)]++; + if (partition) { + // top-level + expected[static_cast(CacheEntryRole::kIndexBlock)]++; + } + expected[static_cast(CacheEntryRole::kDataBlock)]++; + // Because of the simulated long scan, this is not enough to force + // a miss + env_->MockSleepForSeconds(601); + EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg()); + // But this is enough + env_->MockSleepForSeconds(10000); + EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); + prev_expected = expected; + + // Also check the GetProperty interface + std::map values; + ASSERT_TRUE( + db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values)); + + EXPECT_EQ( + ToString(expected[static_cast(CacheEntryRole::kIndexBlock)]), + values["count.index-block"]); + EXPECT_EQ( + ToString(expected[static_cast(CacheEntryRole::kDataBlock)]), + values["count.data-block"]); + EXPECT_EQ( + ToString(expected[static_cast(CacheEntryRole::kFilterBlock)]), + values["count.filter-block"]); + EXPECT_EQ( + ToString( + prev_expected[static_cast(CacheEntryRole::kWriteBuffer)]), + values["count.write-buffer"]); + EXPECT_EQ(ToString(expected[static_cast(CacheEntryRole::kMisc)]), + values["count.misc"]); + + // Add one for kWriteBuffer + { + WriteBufferManager wbm(size_t{1} << 20, cache); + wbm.ReserveMem(1024); + expected[static_cast(CacheEntryRole::kWriteBuffer)]++; + // Now we check that the GetProperty interface is more agressive about + // re-scanning stats, but not totally aggressive. + // Within some time window, we will get cached entry stats + env_->MockSleepForSeconds(1); + EXPECT_EQ(ToString(prev_expected[static_cast( + CacheEntryRole::kWriteBuffer)]), + values["count.write-buffer"]); + // Not enough for a "background" miss but enough for a "foreground" miss + env_->MockSleepForSeconds(45); + + ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, + &values)); + EXPECT_EQ( + ToString( + expected[static_cast(CacheEntryRole::kWriteBuffer)]), + values["count.write-buffer"]); + } + prev_expected = expected; + + // With collector pinned in cache, we should be able to hit + // even if the cache is full + ClearCache(cache.get()); + Cache::Handle* h = nullptr; + ASSERT_OK(cache->Insert("Fill-it-up", nullptr, capacity + 1, + GetNoopDeleterForRole(), + &h, Cache::Priority::HIGH)); + ASSERT_GT(cache->GetUsage(), cache->GetCapacity()); + expected = {}; + // For CacheEntryStatsCollector + expected[static_cast(CacheEntryRole::kMisc)] = 1; + // For Fill-it-up + expected[static_cast(CacheEntryRole::kMisc)]++; + // Still able to hit on saved stats + EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg()); + // Enough to force a miss + env_->MockSleepForSeconds(1000); + EXPECT_EQ(expected, GetCacheEntryRoleCountsBg()); + + cache->Release(h); + + // Now we test that the DB mutex is not held during scans, for the ways + // we know how to (possibly) trigger them. Without a better good way to + // check this, we simply inject an acquire & release of the DB mutex + // deep in the stat collection code. If we were already holding the + // mutex, that is UB that would at least be found by TSAN. + int scan_count = 0; + SyncPoint::GetInstance()->SetCallBack( + "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries", + [this, &scan_count](void*) { + dbfull()->TEST_LockMutex(); + dbfull()->TEST_UnlockMutex(); + ++scan_count; + }); + SyncPoint::GetInstance()->EnableProcessing(); + + // Different things that might trigger a scan, with mock sleeps to + // force a miss. + env_->MockSleepForSeconds(10000); + dbfull()->DumpStats(); + ASSERT_EQ(scan_count, 1); + + env_->MockSleepForSeconds(10000); + ASSERT_TRUE( + db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values)); + ASSERT_EQ(scan_count, 2); + + env_->MockSleepForSeconds(10000); + std::string value_str; + ASSERT_TRUE( + db_->GetProperty(DB::Properties::kBlockCacheEntryStats, &value_str)); + ASSERT_EQ(scan_count, 3); + + env_->MockSleepForSeconds(10000); + ASSERT_TRUE(db_->GetProperty(DB::Properties::kCFStats, &value_str)); + // To match historical speed, querying this property no longer triggers + // a scan, even if results are old. But periodic dump stats should keep + // things reasonably updated. + ASSERT_EQ(scan_count, /*unchanged*/ 3); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + } + EXPECT_GE(iterations_tested, 1); + } +} + #endif // ROCKSDB_LITE +class DBBlockCacheKeyTest + : public DBTestBase, + public testing::WithParamInterface> { + public: + DBBlockCacheKeyTest() + : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {} + + void SetUp() override { + use_compressed_cache_ = std::get<0>(GetParam()); + exclude_file_numbers_ = std::get<1>(GetParam()); + } + + bool use_compressed_cache_; + bool exclude_file_numbers_; +}; + +// Disable LinkFile so that we can physically copy a DB using Checkpoint. +// Disable file GetUniqueId to enable stable cache keys. +class StableCacheKeyTestFS : public FaultInjectionTestFS { + public: + explicit StableCacheKeyTestFS(const std::shared_ptr& base) + : FaultInjectionTestFS(base) { + SetFailGetUniqueId(true); + } + + virtual ~StableCacheKeyTestFS() override {} + + IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&, + IODebugContext*) override { + return IOStatus::NotSupported("Disabled"); + } +}; + +TEST_P(DBBlockCacheKeyTest, StableCacheKeys) { + std::shared_ptr test_fs{ + new StableCacheKeyTestFS(env_->GetFileSystem())}; + std::unique_ptr test_env{ + new CompositeEnvWrapper(env_, test_fs)}; + + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + options.env = test_env.get(); + + BlockBasedTableOptions table_options; + + int key_count = 0; + uint64_t expected_stat = 0; + + std::function verify_stats; + if (use_compressed_cache_) { + if (!Snappy_Supported()) { + ROCKSDB_GTEST_SKIP("Compressed cache test requires snappy support"); + return; + } + options.compression = CompressionType::kSnappyCompression; + table_options.no_block_cache = true; + table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false); + verify_stats = [&options, &expected_stat] { + // One for ordinary SST file and one for external SST file + ASSERT_EQ(expected_stat, + options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD)); + }; + } else { + table_options.cache_index_and_filter_blocks = true; + table_options.block_cache = NewLRUCache(1 << 25, 0, false); + verify_stats = [&options, &expected_stat] { + ASSERT_EQ(expected_stat, + options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); + ASSERT_EQ(expected_stat, + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(expected_stat, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + }; + } + + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"koko"}, options); + + if (exclude_file_numbers_) { + // Simulate something like old behavior without file numbers in properties. + // This is a "control" side of the test that also ensures safely degraded + // behavior on old files. + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BlockBasedTableBuilder::BlockBasedTableBuilder:PreSetupBaseCacheKey", + [&](void* arg) { + TableProperties* props = reinterpret_cast(arg); + props->orig_file_number = 0; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + } + + std::function perform_gets = [&key_count, &expected_stat, this]() { + if (exclude_file_numbers_) { + // No cache key reuse should happen, because we can't rely on current + // file number being stable + expected_stat += key_count; + } else { + // Cache keys should be stable + expected_stat = key_count; + } + for (int i = 0; i < key_count; ++i) { + ASSERT_EQ(Get(1, Key(i)), "abc"); + } + }; + + // Ordinary SST files with same session id + const std::string something_compressible(500U, 'x'); + for (int i = 0; i < 2; ++i) { + ASSERT_OK(Put(1, Key(key_count), "abc")); + ASSERT_OK(Put(1, Key(key_count) + "a", something_compressible)); + ASSERT_OK(Flush(1)); + ++key_count; + } + +#ifndef ROCKSDB_LITE + // Save an export of those ordinary SST files for later + std::string export_files_dir = dbname_ + "/exported"; + ExportImportFilesMetaData* metadata_ptr_ = nullptr; + Checkpoint* checkpoint; + ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); + ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir, + &metadata_ptr_)); + ASSERT_NE(metadata_ptr_, nullptr); + delete checkpoint; + checkpoint = nullptr; + + // External SST files with same session id + SstFileWriter sst_file_writer(EnvOptions(), options); + std::vector external; + for (int i = 0; i < 2; ++i) { + std::string f = dbname_ + "/external" + ToString(i) + ".sst"; + external.push_back(f); + ASSERT_OK(sst_file_writer.Open(f)); + ASSERT_OK(sst_file_writer.Put(Key(key_count), "abc")); + ASSERT_OK( + sst_file_writer.Put(Key(key_count) + "a", something_compressible)); + ++key_count; + ExternalSstFileInfo external_info; + ASSERT_OK(sst_file_writer.Finish(&external_info)); + IngestExternalFileOptions ingest_opts; + ASSERT_OK(db_->IngestExternalFile(handles_[1], {f}, ingest_opts)); + } + + if (exclude_file_numbers_) { + // FIXME(peterd): figure out where these extra ADDs are coming from + options.statistics->recordTick(BLOCK_CACHE_COMPRESSED_ADD, + uint64_t{0} - uint64_t{2}); + } +#endif + + perform_gets(); + verify_stats(); + + // Make sure we can cache hit after re-open + ReopenWithColumnFamilies({"default", "koko"}, options); + + perform_gets(); + verify_stats(); + + // Make sure we can cache hit even on a full copy of the DB. Using + // StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link. + // (Checkpoint not available in LITE mode to test this.) +#ifndef ROCKSDB_LITE + auto db_copy_name = dbname_ + "-copy"; + ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); + ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name)); + delete checkpoint; + + Close(); + Destroy(options); + + // Switch to the DB copy + SaveAndRestore save_dbname(&dbname_, db_copy_name); + ReopenWithColumnFamilies({"default", "koko"}, options); + + perform_gets(); + verify_stats(); + + // And ensure that re-importing + ingesting the same files into a + // different DB uses same cache keys + DestroyAndReopen(options); + + ColumnFamilyHandle* cfh = nullptr; + ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", + ImportColumnFamilyOptions(), + *metadata_ptr_, &cfh)); + ASSERT_NE(cfh, nullptr); + delete cfh; + cfh = nullptr; + delete metadata_ptr_; + metadata_ptr_ = nullptr; + + DestroyDB(export_files_dir, options); + + ReopenWithColumnFamilies({"default", "yoyo"}, options); + + IngestExternalFileOptions ingest_opts; + ASSERT_OK(db_->IngestExternalFile(handles_[1], {external}, ingest_opts)); + + perform_gets(); + verify_stats(); +#endif // !ROCKSDB_LITE + + Close(); + Destroy(options); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); +} + +INSTANTIATE_TEST_CASE_P(DBBlockCacheKeyTest, DBBlockCacheKeyTest, + ::testing::Combine(::testing::Bool(), + ::testing::Bool())); + +class DBBlockCachePinningTest + : public DBTestBase, + public testing::WithParamInterface< + std::tuple> { + public: + DBBlockCachePinningTest() + : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {} + + void SetUp() override { + partition_index_and_filters_ = std::get<0>(GetParam()); + top_level_index_pinning_ = std::get<1>(GetParam()); + partition_pinning_ = std::get<2>(GetParam()); + unpartitioned_pinning_ = std::get<3>(GetParam()); + } + + bool partition_index_and_filters_; + PinningTier top_level_index_pinning_; + PinningTier partition_pinning_; + PinningTier unpartitioned_pinning_; +}; + +TEST_P(DBBlockCachePinningTest, TwoLevelDB) { + // Creates one file in L0 and one file in L1. Both files have enough data that + // their index and filter blocks are partitioned. The L1 file will also have + // a compression dictionary (those are trained only during compaction), which + // must be unpartitioned. + const int kKeySize = 32; + const int kBlockSize = 128; + const int kNumBlocksPerFile = 128; + const int kNumKeysPerFile = kBlockSize * kNumBlocksPerFile / kKeySize; + + Options options = CurrentOptions(); + // `kNoCompression` makes the unit test more portable. But it relies on the + // current behavior of persisting/accessing dictionary even when there's no + // (de)compression happening, which seems fairly likely to change over time. + options.compression = kNoCompression; + options.compression_opts.max_dict_bytes = 4 << 10; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.block_cache = NewLRUCache(1 << 20 /* capacity */); + table_options.block_size = kBlockSize; + table_options.metadata_block_size = kBlockSize; + table_options.cache_index_and_filter_blocks = true; + table_options.metadata_cache_options.top_level_index_pinning = + top_level_index_pinning_; + table_options.metadata_cache_options.partition_pinning = partition_pinning_; + table_options.metadata_cache_options.unpartitioned_pinning = + unpartitioned_pinning_; + table_options.filter_policy.reset( + NewBloomFilterPolicy(10 /* bits_per_key */)); + if (partition_index_and_filters_) { + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + table_options.partition_filters = true; + } + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Reopen(options); + + Random rnd(301); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kKeySize))); + } + ASSERT_OK(Flush()); + if (i == 0) { + // Prevent trivial move so file will be rewritten with dictionary and + // reopened with L1's pinning settings. + CompactRangeOptions cro; + cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + } + } + + // Clear all unpinned blocks so unpinned blocks will show up as cache misses + // when reading a key from a file. + table_options.block_cache->EraseUnRefEntries(); + + // Get base cache values + uint64_t filter_misses = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + uint64_t index_misses = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS); + uint64_t compression_dict_misses = + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS); + + // Read a key from the L0 file + Get(Key(kNumKeysPerFile)); + uint64_t expected_filter_misses = filter_misses; + uint64_t expected_index_misses = index_misses; + uint64_t expected_compression_dict_misses = compression_dict_misses; + if (partition_index_and_filters_) { + if (top_level_index_pinning_ == PinningTier::kNone) { + ++expected_filter_misses; + ++expected_index_misses; + } + if (partition_pinning_ == PinningTier::kNone) { + ++expected_filter_misses; + ++expected_index_misses; + } + } else { + if (unpartitioned_pinning_ == PinningTier::kNone) { + ++expected_filter_misses; + ++expected_index_misses; + } + } + if (unpartitioned_pinning_ == PinningTier::kNone) { + ++expected_compression_dict_misses; + } + ASSERT_EQ(expected_filter_misses, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(expected_index_misses, + TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(expected_compression_dict_misses, + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS)); + + // Clear all unpinned blocks so unpinned blocks will show up as cache misses + // when reading a key from a file. + table_options.block_cache->EraseUnRefEntries(); + + // Read a key from the L1 file + Get(Key(0)); + if (partition_index_and_filters_) { + if (top_level_index_pinning_ == PinningTier::kNone || + top_level_index_pinning_ == PinningTier::kFlushedAndSimilar) { + ++expected_filter_misses; + ++expected_index_misses; + } + if (partition_pinning_ == PinningTier::kNone || + partition_pinning_ == PinningTier::kFlushedAndSimilar) { + ++expected_filter_misses; + ++expected_index_misses; + } + } else { + if (unpartitioned_pinning_ == PinningTier::kNone || + unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) { + ++expected_filter_misses; + ++expected_index_misses; + } + } + if (unpartitioned_pinning_ == PinningTier::kNone || + unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) { + ++expected_compression_dict_misses; + } + ASSERT_EQ(expected_filter_misses, + TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); + ASSERT_EQ(expected_index_misses, + TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); + ASSERT_EQ(expected_compression_dict_misses, + TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS)); +} + +INSTANTIATE_TEST_CASE_P( + DBBlockCachePinningTest, DBBlockCachePinningTest, + ::testing::Combine( + ::testing::Bool(), + ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar, + PinningTier::kAll), + ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar, + PinningTier::kAll), + ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar, + PinningTier::kAll))); + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_bloom_filter_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_bloom_filter_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_bloom_filter_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_bloom_filter_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -7,10 +7,19 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include +#include + +#include "cache/cache_entry_roles.h" +#include "cache/cache_reservation_manager.h" #include "db/db_test_util.h" +#include "options/options_helper.h" #include "port/stack_trace.h" +#include "rocksdb/convenience.h" #include "rocksdb/perf_context.h" #include "table/block_based/filter_policy_internal.h" +#include "test_util/testutil.h" +#include "util/string_util.h" namespace ROCKSDB_NAMESPACE { @@ -22,7 +31,8 @@ class DBBloomFilterTest : public DBTestBase { public: - DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {} + DBBloomFilterTest() + : DBTestBase("db_bloom_filter_test", /*env_do_fsync=*/true) {} }; class DBBloomFilterTestWithParam : public DBTestBase, @@ -35,7 +45,8 @@ uint32_t format_version_; public: - DBBloomFilterTestWithParam() : DBTestBase("/db_bloom_filter_tests") {} + DBBloomFilterTestWithParam() + : DBTestBase("db_bloom_filter_tests", /*env_do_fsync=*/true) {} ~DBBloomFilterTestWithParam() override {} @@ -80,13 +91,16 @@ options_override.partition_filters = partition_filters_; options_override.metadata_block_size = 32; Options options = CurrentOptions(options_override); - if (partition_filters_ && - static_cast( - options.table_factory->GetOptions()) - ->index_type != BlockBasedTableOptions::kTwoLevelIndexSearch) { - // In the current implementation partitioned filters depend on partitioned - // indexes - continue; + if (partition_filters_) { + auto* table_options = + options.table_factory->GetOptions(); + if (table_options != nullptr && + table_options->index_type != + BlockBasedTableOptions::kTwoLevelIndexSearch) { + // In the current implementation partitioned filters depend on + // partitioned indexes + continue; + } } options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); CreateAndReopenWithCF({"pikachu"}, options); @@ -122,8 +136,8 @@ ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); ASSERT_OK(Flush(1)); - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], - true /* disallow trivial move */); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], + true /* disallow trivial move */)); numopen = TestGetTickerCount(options, NO_FILE_OPENS); cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); @@ -172,7 +186,7 @@ ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); - dbfull()->Flush(fo); + ASSERT_OK(dbfull()->Flush(fo)); ASSERT_EQ("foo", Get("barbarbar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); @@ -238,7 +252,7 @@ ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); - dbfull()->Flush(fo); + ASSERT_OK(dbfull()->Flush(fo)); ASSERT_EQ("foo", Get("barbarbar")); ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); @@ -291,7 +305,7 @@ // ranges. ASSERT_OK(dbfull()->Put(wo, "aaa", "")); ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - dbfull()->Flush(fo); + ASSERT_OK(dbfull()->Flush(fo)); Reopen(options); ASSERT_EQ("NOT_FOUND", Get("foo")); @@ -322,7 +336,7 @@ // ranges. ASSERT_OK(dbfull()->Put(wo, "aaa", "")); ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // Reopen with both of whole key off and prefix extractor enabled. // Still no bloom filter should be used. @@ -345,7 +359,7 @@ // ranges. ASSERT_OK(dbfull()->Put(wo, "aaa", "")); ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); options.prefix_extractor.reset(); bbto.whole_key_filtering = true; @@ -358,7 +372,7 @@ // not filtered out by key ranges. ASSERT_OK(dbfull()->Put(wo, "aaa", "")); ASSERT_OK(dbfull()->Put(wo, "zzz", "")); - Flush(); + ASSERT_OK(Flush()); // Now we have two files: // File 1: An older file with prefix bloom. @@ -461,7 +475,7 @@ for (int i = 0; i < N; i += 100) { ASSERT_OK(Put(1, Key(i), Key(i))); } - Flush(1); + ASSERT_OK(Flush(1)); // Prevent auto compactions triggered by seeks env_->delay_sstable_sync_.store(true, std::memory_order_release); @@ -497,36 +511,50 @@ ASSERT_LE(reads, 3 * N / 100); } +#ifndef ROCKSDB_LITE + // Sanity check some table properties + std::map props; + ASSERT_TRUE(db_->GetMapProperty( + handles_[1], DB::Properties::kAggregatedTableProperties, &props)); + uint64_t nkeys = N + N / 100; + uint64_t filter_size = ParseUint64(props["filter_size"]); + EXPECT_LE(filter_size, + (partition_filters_ ? 12 : 11) * nkeys / /*bits / byte*/ 8); + EXPECT_GE(filter_size, 10 * nkeys / /*bits / byte*/ 8); + + uint64_t num_filter_entries = ParseUint64(props["num_filter_entries"]); + EXPECT_EQ(num_filter_entries, nkeys); +#endif // ROCKSDB_LITE + env_->delay_sstable_sync_.store(false, std::memory_order_release); Close(); } while (ChangeCompactOptions()); } -#ifndef ROCKSDB_VALGRIND_RUN +#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestDefFormatVersion, ::testing::Values( std::make_tuple(BFP::kDeprecatedBlock, false, test::kDefaultFormatVersion), - std::make_tuple(BFP::kAuto, true, test::kDefaultFormatVersion), - std::make_tuple(BFP::kAuto, false, test::kDefaultFormatVersion))); + std::make_tuple(BFP::kAutoBloom, true, test::kDefaultFormatVersion), + std::make_tuple(BFP::kAutoBloom, false, test::kDefaultFormatVersion))); INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestWithParam, ::testing::Values( std::make_tuple(BFP::kDeprecatedBlock, false, test::kDefaultFormatVersion), - std::make_tuple(BFP::kAuto, true, test::kDefaultFormatVersion), - std::make_tuple(BFP::kAuto, false, test::kDefaultFormatVersion))); + std::make_tuple(BFP::kAutoBloom, true, test::kDefaultFormatVersion), + std::make_tuple(BFP::kAutoBloom, false, test::kDefaultFormatVersion))); INSTANTIATE_TEST_CASE_P( FormatLatest, DBBloomFilterTestWithParam, ::testing::Values( - std::make_tuple(BFP::kDeprecatedBlock, false, - test::kLatestFormatVersion), - std::make_tuple(BFP::kAuto, true, test::kLatestFormatVersion), - std::make_tuple(BFP::kAuto, false, test::kLatestFormatVersion))); -#endif // ROCKSDB_VALGRIND_RUN + std::make_tuple(BFP::kDeprecatedBlock, false, kLatestFormatVersion), + std::make_tuple(BFP::kAutoBloom, true, kLatestFormatVersion), + std::make_tuple(BFP::kAutoBloom, false, kLatestFormatVersion))); +#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) TEST_F(DBBloomFilterTest, BloomFilterRate) { while (ChangeFilterOptions()) { @@ -641,6 +669,439 @@ } } +/* + * A cache wrapper that tracks peaks and increments of filter + * construction cache reservation. + * p0 + * / \ p1 + * / \ /\ + * / \/ \ + * a / b \ + * peaks = {p0, p1} + * increments = {p1-a, p2-b} + */ +class FilterConstructResPeakTrackingCache : public CacheWrapper { + public: + explicit FilterConstructResPeakTrackingCache(std::shared_ptr target) + : CacheWrapper(std::move(target)), + cur_cache_res_(0), + cache_res_peak_(0), + cache_res_increment_(0), + last_peak_tracked_(false), + cache_res_increments_sum_(0) {} + + using Cache::Insert; + Status Insert(const Slice& key, void* value, size_t charge, + void (*deleter)(const Slice& key, void* value), + Handle** handle = nullptr, + Priority priority = Priority::LOW) override { + Status s = target_->Insert(key, value, charge, deleter, handle, priority); + if (deleter == kNoopDeleterForFilterConstruction) { + if (last_peak_tracked_) { + cache_res_peak_ = 0; + cache_res_increment_ = 0; + last_peak_tracked_ = false; + } + cur_cache_res_ += charge; + cache_res_peak_ = std::max(cache_res_peak_, cur_cache_res_); + cache_res_increment_ += charge; + } + return s; + } + + using Cache::Release; + bool Release(Handle* handle, bool force_erase = false) override { + auto deleter = GetDeleter(handle); + if (deleter == kNoopDeleterForFilterConstruction) { + if (!last_peak_tracked_) { + cache_res_peaks_.push_back(cache_res_peak_); + cache_res_increments_sum_ += cache_res_increment_; + last_peak_tracked_ = true; + } + cur_cache_res_ -= GetCharge(handle); + } + bool is_successful = target_->Release(handle, force_erase); + return is_successful; + } + + std::deque GetReservedCachePeaks() { return cache_res_peaks_; } + + std::size_t GetReservedCacheIncrementSum() { + return cache_res_increments_sum_; + } + + private: + static const Cache::DeleterFn kNoopDeleterForFilterConstruction; + + std::size_t cur_cache_res_; + std::size_t cache_res_peak_; + std::size_t cache_res_increment_; + bool last_peak_tracked_; + std::deque cache_res_peaks_; + std::size_t cache_res_increments_sum_; +}; + +const Cache::DeleterFn + FilterConstructResPeakTrackingCache::kNoopDeleterForFilterConstruction = + CacheReservationManager::TEST_GetNoopDeleterForRole< + CacheEntryRole::kFilterConstruction>(); + +// To align with the type of hash entry being reserved in implementation. +using FilterConstructionReserveMemoryHash = uint64_t; + +class DBFilterConstructionReserveMemoryTestWithParam + : public DBTestBase, + public testing::WithParamInterface< + std::tuple> { + public: + DBFilterConstructionReserveMemoryTestWithParam() + : DBTestBase("db_bloom_filter_tests", + /*env_do_fsync=*/true), + num_key_(0), + reserve_table_builder_memory_(std::get<0>(GetParam())), + policy_(std::get<1>(GetParam())), + partition_filters_(std::get<2>(GetParam())) { + if (!reserve_table_builder_memory_ || + policy_ == BloomFilterPolicy::Mode::kDeprecatedBlock || + policy_ == BloomFilterPolicy::Mode::kLegacyBloom) { + // For these cases, we only interested in whether filter construction + // cache resevation happens instead of its accuracy. Therefore we don't + // need many keys. + num_key_ = 5; + } else if (partition_filters_) { + // For PartitionFilter case, since we set + // table_options.metadata_block_size big enough such that each partition + // trigger at least 1 dummy entry reservation each for hash entries and + // final filter, we need a large number of keys to ensure we have at least + // two partitions. + num_key_ = 18 * CacheReservationManager::GetDummyEntrySize() / + sizeof(FilterConstructionReserveMemoryHash); + } else if (policy_ == BloomFilterPolicy::Mode::kFastLocalBloom) { + // For Bloom Filter + FullFilter case, since we design the num_key_ to + // make hash entry cache reservation be a multiple of dummy entries, the + // correct behavior of charging final filter on top of it will trigger at + // least another dummy entry insertion. Therefore we can assert that + // behavior and we don't need a large number of keys to verify we + // indeed charge the final filter for cache reservation, even though final + // filter is a lot smaller than hash entries. + num_key_ = 1 * CacheReservationManager::GetDummyEntrySize() / + sizeof(FilterConstructionReserveMemoryHash); + } else { + // For Ribbon Filter + FullFilter case, we need a large enough number of + // keys so that charging final filter after releasing the hash entries + // reservation will trigger at least another dummy entry (or equivalently + // to saying, causing another peak in cache reservation) as banding + // reservation might not be a multiple of dummy entry. + num_key_ = 12 * CacheReservationManager::GetDummyEntrySize() / + sizeof(FilterConstructionReserveMemoryHash); + } + } + + BlockBasedTableOptions GetBlockBasedTableOptions() { + BlockBasedTableOptions table_options; + + // We set cache capacity big enough to prevent cache full for convenience in + // calculation. + constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024; + + table_options.reserve_table_builder_memory = reserve_table_builder_memory_; + table_options.filter_policy.reset(new BloomFilterPolicy(10, policy_)); + table_options.partition_filters = partition_filters_; + if (table_options.partition_filters) { + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + // We set table_options.metadata_block_size big enough so that each + // partition trigger at least 1 dummy entry insertion each for hash + // entries and final filter. + table_options.metadata_block_size = 409000; + } + + LRUCacheOptions lo; + lo.capacity = kCacheCapacity; + lo.num_shard_bits = 0; // 2^0 shard + lo.strict_capacity_limit = true; + cache_ = std::make_shared( + (NewLRUCache(lo))); + table_options.block_cache = cache_; + + return table_options; + } + + std::size_t GetNumKey() { return num_key_; } + + bool ReserveTableBuilderMemory() { return reserve_table_builder_memory_; } + + BloomFilterPolicy::Mode GetFilterPolicy() { return policy_; } + + bool PartitionFilters() { return partition_filters_; } + + std::shared_ptr + GetFilterConstructResPeakTrackingCache() { + return cache_; + } + + private: + std::size_t num_key_; + bool reserve_table_builder_memory_; + BloomFilterPolicy::Mode policy_; + bool partition_filters_; + std::shared_ptr cache_; +}; + +INSTANTIATE_TEST_CASE_P( + BlockBasedTableOptions, DBFilterConstructionReserveMemoryTestWithParam, + ::testing::Values( + std::make_tuple(false, BloomFilterPolicy::Mode::kFastLocalBloom, false), + std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, false), + std::make_tuple(true, BloomFilterPolicy::Mode::kFastLocalBloom, true), + std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, + false), + std::make_tuple(true, BloomFilterPolicy::Mode::kStandard128Ribbon, + true), + std::make_tuple(true, BloomFilterPolicy::Mode::kDeprecatedBlock, false), + std::make_tuple(true, BloomFilterPolicy::Mode::kLegacyBloom, false))); + +// TODO: Speed up this test. +// The current test inserts many keys (on the scale of dummy entry size) +// in order to make small memory user (e.g, final filter, partitioned hash +// entries/filter/banding) , which is proportional to the number of +// keys, big enough so that its cache reservation triggers dummy entry insertion +// and becomes observable in the test. +// +// However, inserting that many keys slows down this test and leaves future +// developers an opportunity to speed it up. +// +// Possible approaches & challenges: +// 1. Use sync point during cache reservation of filter construction +// +// Benefit: It does not rely on triggering dummy entry insertion +// but the sync point to verify small memory user is charged correctly. +// +// Challenge: this approach is intrusive. +// +// 2. Make dummy entry size configurable and set it small in the test +// +// Benefit: It increases the precision of cache reservation and therefore +// small memory usage can still trigger insertion of dummy entry. +// +// Challenge: change CacheReservationManager related APIs and a hack +// might be needed to control the size of dummmy entry of +// CacheReservationManager used in filter construction for testing +// since CacheReservationManager is not exposed at the high level. +// +TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) { + Options options = CurrentOptions(); + // We set write_buffer_size big enough so that in the case where there is + // filter construction cache reservation, flush won't be triggered before we + // manually trigger it for clean testing + options.write_buffer_size = 640 << 20; + options.table_factory.reset( + NewBlockBasedTableFactory(GetBlockBasedTableOptions())); + std::shared_ptr cache = + GetFilterConstructResPeakTrackingCache(); + options.create_if_missing = true; + // Disable auto compaction to prevent its unexpected side effect + // to the number of keys per partition designed by us in the test + options.disable_auto_compactions = true; + DestroyAndReopen(options); + int num_key = static_cast(GetNumKey()); + for (int i = 0; i < num_key; i++) { + ASSERT_OK(Put(Key(i), Key(i))); + } + + ASSERT_EQ(cache->GetReservedCacheIncrementSum(), 0) + << "Flush was triggered too early in the test case with filter " + "construction cache reservation - please make sure no flush triggered " + "during the key insertions above"; + + ASSERT_OK(Flush()); + + bool reserve_table_builder_memory = ReserveTableBuilderMemory(); + BloomFilterPolicy::Mode policy = GetFilterPolicy(); + bool partition_filters = PartitionFilters(); + + std::deque filter_construction_cache_res_peaks = + cache->GetReservedCachePeaks(); + std::size_t filter_construction_cache_res_increments_sum = + cache->GetReservedCacheIncrementSum(); + + if (!reserve_table_builder_memory) { + EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0); + return; + } + + if (policy == BloomFilterPolicy::Mode::kDeprecatedBlock || + policy == BloomFilterPolicy::Mode::kLegacyBloom) { + EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0) + << "There shouldn't be filter construction cache reservation as this " + "feature does not support BloomFilterPolicy::Mode::kDeprecatedBlock " + "nor BloomFilterPolicy::Mode::kLegacyBloom"; + return; + } + + const std::size_t kDummyEntrySize = + CacheReservationManager::GetDummyEntrySize(); + + const std::size_t predicted_hash_entries_cache_res = + num_key * sizeof(FilterConstructionReserveMemoryHash); + ASSERT_EQ(predicted_hash_entries_cache_res % kDummyEntrySize, 0) + << "It's by this test's design that predicted_hash_entries_cache_res is " + "a multipe of dummy entry"; + + const std::size_t predicted_hash_entries_cache_res_dummy_entry_num = + predicted_hash_entries_cache_res / kDummyEntrySize; + const std::size_t predicted_final_filter_cache_res = + static_cast(std::ceil( + 1.0 * predicted_hash_entries_cache_res_dummy_entry_num / 6 * + (policy == BloomFilterPolicy::Mode::kStandard128Ribbon ? 0.7 : 1))) * + kDummyEntrySize; + const std::size_t predicted_banding_cache_res = + static_cast( + std::ceil(predicted_hash_entries_cache_res_dummy_entry_num * 2.5)) * + kDummyEntrySize; + + if (policy == BloomFilterPolicy::Mode::kFastLocalBloom) { + /* BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter + * p0 + * / \ + * b / \ + * / \ + * / \ + * 0/ \ + * hash entries = b - 0, final filter = p0 - b + * p0 = hash entries + final filter + * + * The test is designed in a way such that the reservation for b is a + * multiple of dummy entries so that reservation for (p0 - b) + * will trigger at least another dummy entry insertion. + * + * BloomFilterPolicy::Mode::kFastLocalBloom + PartitionedFilter + * p1 + * / \ + * p0 b'/ \ + * / \ / \ + * b / \ / \ + * / \ / \ + * / a \ + * 0/ \ + * partitioned hash entries1 = b - 0, partitioned hash entries1 = b' - a + * parittioned final filter1 = p0 - b, parittioned final filter2 = p1 - b' + * + * (increment p0 - 0) + (increment p1 - a) + * = partitioned hash entries1 + partitioned hash entries2 + * + parittioned final filter1 + parittioned final filter2 + * = hash entries + final filter + * + */ + if (!partition_filters) { + EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1) + << "Filter construction cache reservation should have only 1 peak in " + "case: BloomFilterPolicy::Mode::kFastLocalBloom + FullFilter"; + std::size_t filter_construction_cache_res_peak = + filter_construction_cache_res_peaks[0]; + EXPECT_GT(filter_construction_cache_res_peak, + predicted_hash_entries_cache_res) + << "The testing number of hash entries is designed to make hash " + "entries cache reservation be multiples of dummy entries" + " so the correct behavior of charging final filter on top of it" + " should've triggered at least another dummy entry insertion"; + + std::size_t predicted_filter_construction_cache_res_peak = + predicted_hash_entries_cache_res + predicted_final_filter_cache_res; + EXPECT_GE(filter_construction_cache_res_peak, + predicted_filter_construction_cache_res_peak * 0.9); + EXPECT_LE(filter_construction_cache_res_peak, + predicted_filter_construction_cache_res_peak * 1.1); + return; + } else { + EXPECT_GE(filter_construction_cache_res_peaks.size(), 2) + << "Filter construction cache reservation should have multiple peaks " + "in case: BloomFilterPolicy::Mode::kFastLocalBloom + " + "PartitionedFilter"; + std::size_t predicted_filter_construction_cache_res_increments_sum = + predicted_hash_entries_cache_res + predicted_final_filter_cache_res; + EXPECT_GE(filter_construction_cache_res_increments_sum, + predicted_filter_construction_cache_res_increments_sum * 0.9); + EXPECT_LE(filter_construction_cache_res_increments_sum, + predicted_filter_construction_cache_res_increments_sum * 1.1); + return; + } + } + + if (policy == BloomFilterPolicy::Mode::kStandard128Ribbon) { + /* BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter + * p0 + * / \ p1 + * / \/\ + * b / b' \ + * / \ + * 0/ \ + * hash entries = b - 0, banding = p0 - b, final filter = p1 - b' + * p0 = hash entries + banding + * + * The test is designed in a way such that the reservation for (p1 - b') + * will trigger at least another dummy entry insertion + * (or equivelantly to saying, creating another peak). + * + * BloomFilterPolicy::Mode::kStandard128Ribbon + PartitionedFilter + * p3 + * p0 /\ p4 + * / \ p1 / \ /\ + * / \/\ b''/ a' \ + * b / b' \ / \ + * / \ / \ + * 0/ a \ + * partitioned hash entries1 = b - 0, partitioned hash entries2 = b'' - a + * partitioned banding1 = p0 - b, partitioned banding2 = p3 - b'' + * parittioned final filter1 = p1 - b',parittioned final filter2 = p4 - a' + * + * (increment p0 - 0) + (increment p1 - b') + * + (increment p3 - a) + (increment p4 - a') + * = partitioned hash entries1 + partitioned hash entries2 + * + parittioned banding1 + parittioned banding2 + * + parittioned final filter1 + parittioned final filter2 + * = hash entries + banding + final filter + */ + if (!partition_filters) { + ASSERT_GE(std::floor(1.0 * predicted_final_filter_cache_res / + CacheReservationManager::GetDummyEntrySize()), + 1) + << "Final filter cache reservation too small for this test - please " + "increase the number of keys"; + EXPECT_EQ(filter_construction_cache_res_peaks.size(), 2) + << "Filter construction cache reservation should have 2 peaks in " + "case: BloomFilterPolicy::Mode::kStandard128Ribbon + FullFilter. " + "The second peak is resulted from charging the final filter after " + "decreasing the hash entry reservation since the testing final " + "filter reservation is designed to be at least 1 dummy entry size"; + + std::size_t filter_construction_cache_res_peak = + filter_construction_cache_res_peaks[0]; + std::size_t predicted_filter_construction_cache_res_peak = + predicted_hash_entries_cache_res + predicted_banding_cache_res; + EXPECT_GE(filter_construction_cache_res_peak, + predicted_filter_construction_cache_res_peak * 0.9); + EXPECT_LE(filter_construction_cache_res_peak, + predicted_filter_construction_cache_res_peak * 1.1); + return; + } else { + EXPECT_GE(filter_construction_cache_res_peaks.size(), 3) + << "Filter construction cache reservation should have more than 3 " + "peaks " + "in case: BloomFilterPolicy::Mode::kStandard128Ribbon + " + "PartitionedFilter"; + std::size_t predicted_filter_construction_cache_res_increments_sum = + predicted_hash_entries_cache_res + predicted_banding_cache_res + + predicted_final_filter_cache_res; + EXPECT_GE(filter_construction_cache_res_increments_sum, + predicted_filter_construction_cache_res_increments_sum * 0.9); + EXPECT_LE(filter_construction_cache_res_increments_sum, + predicted_filter_construction_cache_res_increments_sum * 1.1); + return; + } + } +} + namespace { // A wrapped bloom over block-based FilterPolicy class TestingWrappedBlockBasedFilterPolicy : public FilterPolicy { @@ -765,6 +1226,14 @@ const std::unique_ptr policy_otherwise_; }; +static std::map + table_file_creation_reason_to_string{ + {TableFileCreationReason::kCompaction, "kCompaction"}, + {TableFileCreationReason::kFlush, "kFlush"}, + {TableFileCreationReason::kMisc, "kMisc"}, + {TableFileCreationReason::kRecovery, "kRecovery"}, + }; + class TestingContextCustomFilterPolicy : public LevelAndStyleCustomFilterPolicy { public: @@ -777,11 +1246,17 @@ const FilterBuildingContext& context) const override { test_report_ += "cf="; test_report_ += context.column_family_name; - test_report_ += ",cs="; + test_report_ += ",s="; test_report_ += OptionsHelper::compaction_style_to_string[context.compaction_style]; - test_report_ += ",lv="; - test_report_ += std::to_string(context.level_at_creation); + test_report_ += ",n="; + test_report_ += ToString(context.num_levels); + test_report_ += ",l="; + test_report_ += ToString(context.level_at_creation); + test_report_ += ",b="; + test_report_ += ToString(int{context.is_bottommost}); + test_report_ += ",r="; + test_report_ += table_file_creation_reason_to_string[context.reason]; test_report_ += "\n"; return LevelAndStyleCustomFilterPolicy::GetBuilderWithContext(context); @@ -799,18 +1274,21 @@ } // namespace TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { + auto policy = std::make_shared(15, 8, 5); + Options options; for (bool fifo : {true, false}) { - Options options = CurrentOptions(); + options = CurrentOptions(); + options.max_open_files = fifo ? -1 : options.max_open_files; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.compaction_style = fifo ? kCompactionStyleFIFO : kCompactionStyleLevel; BlockBasedTableOptions table_options; - auto policy = std::make_shared(15, 8, 5); table_options.filter_policy = policy; table_options.format_version = 5; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + TryReopen(options); CreateAndReopenWithCF({fifo ? "abe" : "bob"}, options); const int maxKey = 10000; @@ -821,16 +1299,16 @@ ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); Flush(1); EXPECT_EQ(policy->DumpTestReport(), - fifo ? "cf=abe,cs=kCompactionStyleFIFO,lv=0\n" - : "cf=bob,cs=kCompactionStyleLevel,lv=0\n"); + fifo ? "cf=abe,s=kCompactionStyleFIFO,n=1,l=0,b=0,r=kFlush\n" + : "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n"); for (int i = maxKey / 2; i < maxKey; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } Flush(1); EXPECT_EQ(policy->DumpTestReport(), - fifo ? "cf=abe,cs=kCompactionStyleFIFO,lv=0\n" - : "cf=bob,cs=kCompactionStyleLevel,lv=0\n"); + fifo ? "cf=abe,s=kCompactionStyleFIFO,n=1,l=0,b=0,r=kFlush\n" + : "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n"); // Check that they can be found for (int i = 0; i < maxKey; i++) { @@ -858,7 +1336,7 @@ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr)); EXPECT_EQ(policy->DumpTestReport(), - "cf=bob,cs=kCompactionStyleLevel,lv=1\n"); + "cf=bob,s=kCompactionStyleLevel,n=7,l=1,b=1,r=kCompaction\n"); // Check that we now have one filter, about 9.2% FP rate (5 bits per key) for (int i = 0; i < maxKey; i++) { @@ -870,11 +1348,25 @@ EXPECT_GE(useful_count, maxKey * 0.90); EXPECT_LE(useful_count, maxKey * 0.91); } + } else { +#ifndef ROCKSDB_LITE + // Also try external SST file + { + std::string file_path = dbname_ + "/external.sst"; + SstFileWriter sst_file_writer(EnvOptions(), options, handles_[1]); + ASSERT_OK(sst_file_writer.Open(file_path)); + ASSERT_OK(sst_file_writer.Put("key", "value")); + ASSERT_OK(sst_file_writer.Finish()); + } + // Note: kCompactionStyleLevel is default, ignored if num_levels == -1 + EXPECT_EQ(policy->DumpTestReport(), + "cf=abe,s=kCompactionStyleLevel,n=-1,l=-1,b=0,r=kMisc\n"); +#endif } // Destroy ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - dbfull()->DestroyColumnFamilyHandle(handles_[1]); + ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); handles_[1] = nullptr; } } @@ -1010,6 +1502,63 @@ ASSERT_EQ(1, get_perf_context()->bloom_memtable_hit_count); } +TEST_F(DBBloomFilterTest, MemtableWholeKeyBloomFilterMultiGet) { + Options options = CurrentOptions(); + options.memtable_prefix_bloom_size_ratio = 0.015; + options.memtable_whole_key_filtering = true; + Reopen(options); + std::string key1("AA"); + std::string key2("BB"); + std::string key3("CC"); + std::string key4("DD"); + std::string key_not("EE"); + std::string value1("Value1"); + std::string value2("Value2"); + std::string value3("Value3"); + std::string value4("Value4"); + + ASSERT_OK(Put(key1, value1, WriteOptions())); + ASSERT_OK(Put(key2, value2, WriteOptions())); + ASSERT_OK(Flush()); + ASSERT_OK(Put(key3, value3, WriteOptions())); + const Snapshot* snapshot = db_->GetSnapshot(); + ASSERT_OK(Put(key4, value4, WriteOptions())); + + // Delete key2 and key3 + ASSERT_OK( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "BA", "CZ")); + + // Read without snapshot + auto results = MultiGet({key_not, key1, key2, key3, key4}); + ASSERT_EQ(results[0], "NOT_FOUND"); + ASSERT_EQ(results[1], value1); + ASSERT_EQ(results[2], "NOT_FOUND"); + ASSERT_EQ(results[3], "NOT_FOUND"); + ASSERT_EQ(results[4], value4); + + // Also check Get + ASSERT_EQ(Get(key1), value1); + ASSERT_EQ(Get(key2), "NOT_FOUND"); + ASSERT_EQ(Get(key3), "NOT_FOUND"); + ASSERT_EQ(Get(key4), value4); + + // Read with snapshot + results = MultiGet({key_not, key1, key2, key3, key4}, snapshot); + ASSERT_EQ(results[0], "NOT_FOUND"); + ASSERT_EQ(results[1], value1); + ASSERT_EQ(results[2], value2); + ASSERT_EQ(results[3], value3); + ASSERT_EQ(results[4], "NOT_FOUND"); + + // Also check Get + ASSERT_EQ(Get(key1, snapshot), value1); + ASSERT_EQ(Get(key2, snapshot), value2); + ASSERT_EQ(Get(key3, snapshot), value3); + ASSERT_EQ(Get(key4, snapshot), "NOT_FOUND"); + + db_->ReleaseSnapshot(snapshot); +} + TEST_F(DBBloomFilterTest, MemtablePrefixBloomOutOfDomain) { constexpr size_t kPrefixSize = 8; const std::string kKey = "key"; @@ -1029,6 +1578,215 @@ ASSERT_EQ(kKey, iter->key()); } +class DBBloomFilterTestVaryPrefixAndFormatVer + : public DBTestBase, + public testing::WithParamInterface> { + protected: + bool use_prefix_; + uint32_t format_version_; + + public: + DBBloomFilterTestVaryPrefixAndFormatVer() + : DBTestBase("db_bloom_filter_tests", /*env_do_fsync=*/true) {} + + ~DBBloomFilterTestVaryPrefixAndFormatVer() override {} + + void SetUp() override { + use_prefix_ = std::get<0>(GetParam()); + format_version_ = std::get<1>(GetParam()); + } + + static std::string UKey(uint32_t i) { return Key(static_cast(i)); } +}; + +TEST_P(DBBloomFilterTestVaryPrefixAndFormatVer, PartitionedMultiGet) { + Options options = CurrentOptions(); + if (use_prefix_) { + // Entire key from UKey() + options.prefix_extractor.reset(NewCappedPrefixTransform(9)); + } + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(NewBloomFilterPolicy(20)); + bbto.partition_filters = true; + bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + bbto.whole_key_filtering = !use_prefix_; + if (use_prefix_) { // (not related to prefix, just alternating between) + // Make sure code appropriately deals with metadata block size setting + // that is "too small" (smaller than minimum size for filter builder) + bbto.metadata_block_size = 63; + } else { + // Make sure the test will work even on platforms with large minimum + // filter size, due to large cache line size. + // (Largest cache line size + 10+% overhead.) + bbto.metadata_block_size = 290; + } + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyAndReopen(options); + ReadOptions ropts; + + constexpr uint32_t N = 12000; + // Add N/2 evens + for (uint32_t i = 0; i < N; i += 2) { + ASSERT_OK(Put(UKey(i), UKey(i))); + } + ASSERT_OK(Flush()); +#ifndef ROCKSDB_LITE + ASSERT_EQ(TotalTableFiles(), 1); +#endif + + constexpr uint32_t Q = 29; + // MultiGet In + std::array keys; + std::array key_slices; + std::array column_families; + // MultiGet Out + std::array statuses; + std::array values; + + TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT); + TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL); + TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL); + TestGetAndResetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED); + TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE); + TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE); + + // Check that initial clump of keys only loads one partition filter from + // block cache. + // And that spread out keys load many partition filters. + // In both cases, mix present vs. not present keys. + for (uint32_t stride : {uint32_t{1}, (N / Q) | 1}) { + for (uint32_t i = 0; i < Q; ++i) { + keys[i] = UKey(i * stride); + key_slices[i] = Slice(keys[i]); + column_families[i] = db_->DefaultColumnFamily(); + statuses[i] = Status(); + values[i] = PinnableSlice(); + } + + db_->MultiGet(ropts, Q, &column_families[0], &key_slices[0], &values[0], + /*timestamps=*/nullptr, &statuses[0], true); + + // Confirm correct status results + uint32_t number_not_found = 0; + for (uint32_t i = 0; i < Q; ++i) { + if ((i * stride % 2) == 0) { + ASSERT_OK(statuses[i]); + } else { + ASSERT_TRUE(statuses[i].IsNotFound()); + ++number_not_found; + } + } + + // Confirm correct Bloom stats (no FPs) + uint64_t filter_useful = TestGetAndResetTickerCount( + options, + use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL); + uint64_t filter_checked = + TestGetAndResetTickerCount(options, use_prefix_ + ? BLOOM_FILTER_PREFIX_CHECKED + : BLOOM_FILTER_FULL_POSITIVE) + + (use_prefix_ ? 0 : filter_useful); + EXPECT_EQ(filter_useful, number_not_found); + EXPECT_EQ(filter_checked, Q); + if (!use_prefix_) { + EXPECT_EQ( + TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), + Q - number_not_found); + } + + // Confirm no duplicate loading same filter partition + uint64_t filter_accesses = + TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) + + TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + if (stride == 1) { + EXPECT_EQ(filter_accesses, 1); + } else { + // for large stride + EXPECT_GE(filter_accesses, Q / 2 + 1); + } + } + + // Check that a clump of keys (present and not) works when spanning + // two partitions + int found_spanning = 0; + for (uint32_t start = 0; start < N / 2;) { + for (uint32_t i = 0; i < Q; ++i) { + keys[i] = UKey(start + i); + key_slices[i] = Slice(keys[i]); + column_families[i] = db_->DefaultColumnFamily(); + statuses[i] = Status(); + values[i] = PinnableSlice(); + } + + db_->MultiGet(ropts, Q, &column_families[0], &key_slices[0], &values[0], + /*timestamps=*/nullptr, &statuses[0], true); + + // Confirm correct status results + uint32_t number_not_found = 0; + for (uint32_t i = 0; i < Q; ++i) { + if (((start + i) % 2) == 0) { + ASSERT_OK(statuses[i]); + } else { + ASSERT_TRUE(statuses[i].IsNotFound()); + ++number_not_found; + } + } + + // Confirm correct Bloom stats (might see some FPs) + uint64_t filter_useful = TestGetAndResetTickerCount( + options, + use_prefix_ ? BLOOM_FILTER_PREFIX_USEFUL : BLOOM_FILTER_USEFUL); + uint64_t filter_checked = + TestGetAndResetTickerCount(options, use_prefix_ + ? BLOOM_FILTER_PREFIX_CHECKED + : BLOOM_FILTER_FULL_POSITIVE) + + (use_prefix_ ? 0 : filter_useful); + EXPECT_GE(filter_useful, number_not_found - 2); // possible FP + EXPECT_EQ(filter_checked, Q); + if (!use_prefix_) { + EXPECT_EQ( + TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), + Q - number_not_found); + } + + // Confirm no duplicate loading of same filter partition + uint64_t filter_accesses = + TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_HIT) + + TestGetAndResetTickerCount(options, BLOCK_CACHE_FILTER_MISS); + if (filter_accesses == 2) { + // Spanned across partitions. + ++found_spanning; + if (found_spanning >= 2) { + break; + } else { + // Ensure that at least once we have at least one present and + // one non-present key on both sides of partition boundary. + start += 2; + } + } else { + EXPECT_EQ(filter_accesses, 1); + // See explanation at "start += 2" + start += Q - 4; + } + } + EXPECT_TRUE(found_spanning >= 2); +} + +INSTANTIATE_TEST_CASE_P(DBBloomFilterTestVaryPrefixAndFormatVer, + DBBloomFilterTestVaryPrefixAndFormatVer, + ::testing::Values( + // (use_prefix, format_version) + std::make_tuple(false, 2), + std::make_tuple(false, 3), + std::make_tuple(false, 4), + std::make_tuple(false, 5), + std::make_tuple(true, 2), + std::make_tuple(true, 3), + std::make_tuple(true, 4), + std::make_tuple(true, 5))); + #ifndef ROCKSDB_LITE namespace { namespace BFP2 { @@ -1229,9 +1987,9 @@ snprintf(buf, sizeof(buf), "%02d______:end", 10); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); - dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr, - nullptr); // move to level 1 + ASSERT_OK(dbtest->Flush()); + ASSERT_OK(dbtest->dbfull()->CompactRange(CompactRangeOptions(), nullptr, + nullptr)); // move to level 1 // GROUP 1 for (int i = 1; i <= small_range_sstfiles; i++) { @@ -1343,27 +2101,26 @@ for (int i = 0; i < numkeys; i += 2) { keys.push_back(i); } - std::random_shuffle(std::begin(keys), std::end(keys)); - + RandomShuffle(std::begin(keys), std::end(keys)); int num_inserted = 0; for (int key : keys) { ASSERT_OK(Put(1, Key(key), "val")); if (++num_inserted % 1000 == 0) { - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); } } ASSERT_OK(Put(1, Key(0), "val")); ASSERT_OK(Put(1, Key(numkeys), "val")); ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); if (NumTableFilesAtLevel(0, 1) == 0) { // No Level 0 file. Create one. ASSERT_OK(Put(1, Key(0), "val")); ASSERT_OK(Put(1, Key(numkeys), "val")); ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); } for (int i = 1; i < numkeys; i += 2) { @@ -1468,7 +2225,8 @@ BottommostLevelCompaction::kSkip; compact_options.change_level = true; compact_options.target_level = 7; - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); + ASSERT_TRUE(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr) + .IsNotSupported()); ASSERT_EQ(trivial_move, 1); ASSERT_EQ(non_trivial_move, 0); @@ -1500,10 +2258,10 @@ int CountIter(std::unique_ptr& iter, const Slice& key) { int count = 0; - for (iter->Seek(key); iter->Valid() && iter->status() == Status::OK(); - iter->Next()) { + for (iter->Seek(key); iter->Valid(); iter->Next()) { count++; } + EXPECT_OK(iter->status()); return count; } @@ -1516,6 +2274,7 @@ int using_full_builder = bfp_impl != BFP::kDeprecatedBlock; Options options; options.create_if_missing = true; + options.env = CurrentOptions().env; options.prefix_extractor.reset(NewCappedPrefixTransform(4)); options.disable_auto_compactions = true; options.statistics = CreateDBStatistics(); @@ -1532,7 +2291,7 @@ ASSERT_OK(Put("abcdxxx1", "val2")); ASSERT_OK(Put("abcdxxx2", "val3")); ASSERT_OK(Put("abcdxxx3", "val4")); - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); { // prefix_extractor has not changed, BF will always be read Slice upper_bound("abce"); @@ -1553,8 +2312,8 @@ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:5"}})); - ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), - "rocksdb.FixedPrefix.5")); + ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), + "rocksdb.FixedPrefix.5"); { // BF changed, [abcdxx00, abce) is a valid bound, will trigger BF read Slice upper_bound("abce"); @@ -1646,6 +2405,7 @@ for (auto bfp_impl : BFP::kAllFixedImpls) { int using_full_builder = bfp_impl != BFP::kDeprecatedBlock; Options options; + options.env = CurrentOptions().env; options.create_if_missing = true; options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.disable_auto_compactions = true; @@ -1672,8 +2432,8 @@ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED), 1); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), - "rocksdb.CappedPrefix.3")); + ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), + "rocksdb.CappedPrefix.3"); read_options.iterate_upper_bound = &upper_bound; std::unique_ptr iter(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter, "foo"), 2); @@ -1689,7 +2449,7 @@ ASSERT_OK(Put("foo4", "bar4")); ASSERT_OK(Put("foq5", "bar5")); ASSERT_OK(Put("fpb", "1")); - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); { // BF is cappped:3 now std::unique_ptr iter_tmp(db_->NewIterator(read_options)); @@ -1706,14 +2466,14 @@ } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:2"}})); - ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), - "rocksdb.FixedPrefix.2")); + ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), + "rocksdb.FixedPrefix.2"); // third SST with fixed:2 BF ASSERT_OK(Put("foo6", "bar6")); ASSERT_OK(Put("foo7", "bar7")); ASSERT_OK(Put("foq8", "bar8")); ASSERT_OK(Put("fpc", "2")); - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); { // BF is fixed:2 now std::unique_ptr iter_tmp(db_->NewIterator(read_options)); @@ -1754,8 +2514,8 @@ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 3); } ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), - "rocksdb.CappedPrefix.3")); + ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), + "rocksdb.CappedPrefix.3"); { std::unique_ptr iter_all(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_all, "foo"), 6); @@ -1795,9 +2555,8 @@ // create a new CF and set prefix_extractor dynamically options.prefix_extractor.reset(NewCappedPrefixTransform(3)); CreateColumnFamilies({"ramen_dojo_" + std::to_string(iteration)}, options); - ASSERT_EQ(0, - strcmp(dbfull()->GetOptions(handles_[2]).prefix_extractor->Name(), - "rocksdb.CappedPrefix.3")); + ASSERT_EQ(dbfull()->GetOptions(handles_[2]).prefix_extractor->AsString(), + "rocksdb.CappedPrefix.3"); ASSERT_OK(Put(2, "foo3", "bar3")); ASSERT_OK(Put(2, "foo4", "bar4")); ASSERT_OK(Put(2, "foo5", "bar5")); @@ -1813,9 +2572,8 @@ } ASSERT_OK( dbfull()->SetOptions(handles_[2], {{"prefix_extractor", "fixed:2"}})); - ASSERT_EQ(0, - strcmp(dbfull()->GetOptions(handles_[2]).prefix_extractor->Name(), - "rocksdb.FixedPrefix.2")); + ASSERT_EQ(dbfull()->GetOptions(handles_[2]).prefix_extractor->AsString(), + "rocksdb.FixedPrefix.2"); { std::unique_ptr iter( db_->NewIterator(read_options, handles_[2])); @@ -1824,10 +2582,10 @@ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); } ASSERT_OK(dbfull()->DropColumnFamily(handles_[2])); - dbfull()->DestroyColumnFamilyHandle(handles_[2]); + ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[2])); handles_[2] = nullptr; ASSERT_OK(dbfull()->DropColumnFamily(handles_[1])); - dbfull()->DestroyColumnFamilyHandle(handles_[1]); + ASSERT_OK(dbfull()->DestroyColumnFamilyHandle(handles_[1])); handles_[1] = nullptr; iteration++; } @@ -1838,6 +2596,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { for (auto bfp_impl : BFP::kAllFixedImpls) { Options options; + options.env = CurrentOptions().env; options.create_if_missing = true; options.prefix_extractor.reset(NewFixedPrefixTransform(1)); options.disable_auto_compactions = true; @@ -1879,8 +2638,8 @@ ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_PREFIX_USEFUL), 0); ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "capped:3"}})); - ASSERT_EQ(0, strcmp(dbfull()->GetOptions().prefix_extractor->Name(), - "rocksdb.CappedPrefix.3")); + ASSERT_EQ(dbfull()->GetOptions().prefix_extractor->AsString(), + "rocksdb.CappedPrefix.3"); { std::unique_ptr iter(db_->NewIterator(read_options)); // "fp*" should be skipped @@ -1899,6 +2658,55 @@ } } +TEST_F(DBBloomFilterTest, SeekForPrevWithPartitionedFilters) { + Options options = CurrentOptions(); + constexpr size_t kNumKeys = 10000; + static_assert(kNumKeys <= 10000, "kNumKeys have to be <= 10000"); + options.memtable_factory.reset( + test::NewSpecialSkipListFactory(kNumKeys + 10)); + options.create_if_missing = true; + constexpr size_t kPrefixLength = 4; + options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixLength)); + options.compression = kNoCompression; + BlockBasedTableOptions bbto; + bbto.filter_policy.reset(NewBloomFilterPolicy(50)); + bbto.index_shortening = + BlockBasedTableOptions::IndexShorteningMode::kNoShortening; + bbto.block_size = 128; + bbto.metadata_block_size = 128; + bbto.partition_filters = true; + bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyAndReopen(options); + + const std::string value(64, '\0'); + + WriteOptions write_opts; + write_opts.disableWAL = true; + for (size_t i = 0; i < kNumKeys; ++i) { + std::ostringstream oss; + oss << std::setfill('0') << std::setw(4) << std::fixed << i; + ASSERT_OK(db_->Put(write_opts, oss.str(), value)); + } + ASSERT_OK(Flush()); + + ReadOptions read_opts; + // Use legacy, implicit prefix seek + read_opts.total_order_seek = false; + read_opts.auto_prefix_mode = false; + std::unique_ptr it(db_->NewIterator(read_opts)); + for (size_t i = 0; i < kNumKeys; ++i) { + // Seek with a key after each one added but with same prefix. One will + // surely cross a partition boundary. + std::ostringstream oss; + oss << std::setfill('0') << std::setw(4) << std::fixed << i << "a"; + it->SeekForPrev(oss.str()); + ASSERT_OK(it->status()); + ASSERT_TRUE(it->Valid()); + } + it.reset(); +} + #endif // ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_compaction_filter_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_compaction_filter_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_compaction_filter_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_compaction_filter_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -21,7 +21,8 @@ class DBTestCompactionFilter : public DBTestBase { public: - DBTestCompactionFilter() : DBTestBase("/db_compaction_filter_test") {} + DBTestCompactionFilter() + : DBTestBase("db_compaction_filter_test", /*env_do_fsync=*/true) {} }; // Param variant of DBTestBase::ChangeCompactOptions @@ -41,11 +42,11 @@ option_config_ == kUniversalSubcompactions) { assert(options.max_subcompactions > 1); } - TryReopen(options); + Reopen(options); } }; -#ifndef ROCKSDB_VALGRIND_RUN +#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) INSTANTIATE_TEST_CASE_P( CompactionFilterWithOption, DBTestCompactionFilterWithCompactParam, ::testing::Values(DBTestBase::OptionConfig::kDefault, @@ -54,11 +55,11 @@ DBTestBase::OptionConfig::kLevelSubcompactions, DBTestBase::OptionConfig::kUniversalSubcompactions)); #else -// Run fewer cases in valgrind +// Run fewer cases in non-full valgrind to save time. INSTANTIATE_TEST_CASE_P(CompactionFilterWithOption, DBTestCompactionFilterWithCompactParam, ::testing::Values(DBTestBase::OptionConfig::kDefault)); -#endif // ROCKSDB_VALGRIND_RUN +#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) class KeepFilter : public CompactionFilter { public: @@ -81,6 +82,11 @@ return true; } + bool FilterMergeOperand(int /*level*/, const Slice& /*key*/, + const Slice& /*operand*/) const override { + return true; + } + const char* Name() const override { return "DeleteFilter"; } }; @@ -126,22 +132,6 @@ const char* Name() const override { return "DeleteFilter"; } }; -class DelayFilter : public CompactionFilter { - public: - explicit DelayFilter(DBTestBase* d) : db_test(d) {} - bool Filter(int /*level*/, const Slice& /*key*/, const Slice& /*value*/, - std::string* /*new_value*/, - bool* /*value_changed*/) const override { - db_test->env_->addon_time_.fetch_add(1000); - return true; - } - - const char* Name() const override { return "DelayFilter"; } - - private: - DBTestBase* db_test; -}; - class ConditionalFilter : public CompactionFilter { public: explicit ConditionalFilter(const std::string* filtered_value) @@ -205,18 +195,36 @@ bool compaction_filter_created_; }; +// This filter factory is configured with a `TableFileCreationReason`. Only +// table files created for that reason will undergo filtering. This +// configurability makes it useful to tests for filtering non-compaction table +// files, such as "CompactionFilterFlush" and "CompactionFilterRecovery". class DeleteFilterFactory : public CompactionFilterFactory { public: + explicit DeleteFilterFactory(TableFileCreationReason reason) + : reason_(reason) {} + std::unique_ptr CreateCompactionFilter( const CompactionFilter::Context& context) override { - if (context.is_manual_compaction) { - return std::unique_ptr(new DeleteFilter()); - } else { + EXPECT_EQ(reason_, context.reason); + if (context.reason == TableFileCreationReason::kCompaction && + !context.is_manual_compaction) { + // Table files created by automatic compaction do not undergo filtering. + // Presumably some tests rely on this. return std::unique_ptr(nullptr); } + return std::unique_ptr(new DeleteFilter()); + } + + bool ShouldFilterTableFileCreation( + TableFileCreationReason reason) const override { + return reason_ == reason; } const char* Name() const override { return "DeleteFilterFactory"; } + + private: + const TableFileCreationReason reason_; }; // Delete Filter Factory which ignores snapshots @@ -248,20 +256,6 @@ const char* Name() const override { return "SkipEvenFilterFactory"; } }; -class DelayFilterFactory : public CompactionFilterFactory { - public: - explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {} - std::unique_ptr CreateCompactionFilter( - const CompactionFilter::Context& /*context*/) override { - return std::unique_ptr(new DelayFilter(db_test)); - } - - const char* Name() const override { return "DelayFilterFactory"; } - - private: - DBTestBase* db_test; -}; - class ConditionalFilterFactory : public CompactionFilterFactory { public: explicit ConditionalFilterFactory(const Slice& filtered_value) @@ -305,7 +299,7 @@ for (int i = 0; i < 100000; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); - Put(1, key, value); + ASSERT_OK(Put(1, key, value)); } ASSERT_OK(Flush(1)); @@ -313,10 +307,10 @@ // the compaction is each level invokes the filter for // all the keys in that level. cfilter_count = 0; - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); ASSERT_EQ(cfilter_count, 100000); cfilter_count = 0; - dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); + ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); ASSERT_EQ(cfilter_count, 100000); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); @@ -336,19 +330,21 @@ InternalKeyComparator icmp(options.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* upper_bound */); + ReadOptions read_options; ScopedArenaIterator iter(dbfull()->NewInternalIterator( - &arena, &range_del_agg, kMaxSequenceNumber, handles_[1])); + read_options, &arena, &range_del_agg, kMaxSequenceNumber, handles_[1])); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { ParsedInternalKey ikey(Slice(), 0, kTypeValue); - ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true); + ASSERT_OK(ParseInternalKey(iter->key(), &ikey, true /* log_err_key */)); total++; if (ikey.sequence != 0) { count++; } iter->Next(); } + ASSERT_OK(iter->status()); } ASSERT_EQ(total, 100000); ASSERT_EQ(count, 0); @@ -365,10 +361,10 @@ // means that all keys should pass at least once // via the compaction filter cfilter_count = 0; - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); ASSERT_EQ(cfilter_count, 100000); cfilter_count = 0; - dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); + ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); ASSERT_EQ(cfilter_count, 100000); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); @@ -376,7 +372,8 @@ // create a new database with the compaction // filter in such a way that it deletes all keys - options.compaction_filter_factory = std::make_shared(); + options.compaction_filter_factory = std::make_shared( + TableFileCreationReason::kCompaction); options.create_if_missing = true; DestroyAndReopen(options); CreateAndReopenWithCF({"pikachu"}, options); @@ -397,10 +394,10 @@ // verify that at the end of the compaction process, // nothing is left. cfilter_count = 0; - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); ASSERT_EQ(cfilter_count, 100000); cfilter_count = 0; - dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); + ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); ASSERT_EQ(cfilter_count, 0); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0); @@ -415,6 +412,7 @@ count++; iter->Next(); } + ASSERT_OK(iter->status()); ASSERT_EQ(count, 0); } @@ -426,13 +424,14 @@ InternalKeyComparator icmp(options.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* upper_bound */); + ReadOptions read_options; ScopedArenaIterator iter(dbfull()->NewInternalIterator( - &arena, &range_del_agg, kMaxSequenceNumber, handles_[1])); + read_options, &arena, &range_del_agg, kMaxSequenceNumber, handles_[1])); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { ParsedInternalKey ikey(Slice(), 0, kTypeValue); - ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true); + ASSERT_OK(ParseInternalKey(iter->key(), &ikey, true /* log_err_key */)); ASSERT_NE(ikey.sequence, (unsigned)0); count++; iter->Next(); @@ -446,7 +445,8 @@ // entries in VersionEdit, but none of the 'AddFile's. TEST_F(DBTestCompactionFilter, CompactionFilterDeletesAll) { Options options = CurrentOptions(); - options.compaction_filter_factory = std::make_shared(); + options.compaction_filter_factory = std::make_shared( + TableFileCreationReason::kCompaction); options.disable_auto_compactions = true; options.create_if_missing = true; DestroyAndReopen(options); @@ -454,9 +454,9 @@ // put some data for (int table = 0; table < 4; ++table) { for (int i = 0; i < 10 + table; ++i) { - Put(ToString(table * 100 + i), "val"); + ASSERT_OK(Put(ToString(table * 100 + i), "val")); } - Flush(); + ASSERT_OK(Flush()); } // this will produce empty file (delete compaction filter) @@ -467,6 +467,7 @@ Iterator* itr = db_->NewIterator(ReadOptions()); itr->SeekToFirst(); + ASSERT_OK(itr->status()); // empty db ASSERT_TRUE(!itr->Valid()); @@ -474,6 +475,64 @@ } #endif // ROCKSDB_LITE +TEST_F(DBTestCompactionFilter, CompactionFilterFlush) { + // Tests a `CompactionFilterFactory` that filters when table file is created + // by flush. + Options options = CurrentOptions(); + options.compaction_filter_factory = + std::make_shared(TableFileCreationReason::kFlush); + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + Reopen(options); + + // Puts and Merges are purged in flush. + ASSERT_OK(Put("a", "v")); + ASSERT_OK(Merge("b", "v")); + ASSERT_OK(Flush()); + ASSERT_EQ("NOT_FOUND", Get("a")); + ASSERT_EQ("NOT_FOUND", Get("b")); + + // However, Puts and Merges are preserved by recovery. + ASSERT_OK(Put("a", "v")); + ASSERT_OK(Merge("b", "v")); + Reopen(options); + ASSERT_EQ("v", Get("a")); + ASSERT_EQ("v", Get("b")); + + // Likewise, compaction does not apply filtering. + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("v", Get("a")); + ASSERT_EQ("v", Get("b")); +} + +TEST_F(DBTestCompactionFilter, CompactionFilterRecovery) { + // Tests a `CompactionFilterFactory` that filters when table file is created + // by recovery. + Options options = CurrentOptions(); + options.compaction_filter_factory = + std::make_shared(TableFileCreationReason::kRecovery); + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + Reopen(options); + + // Puts and Merges are purged in recovery. + ASSERT_OK(Put("a", "v")); + ASSERT_OK(Merge("b", "v")); + Reopen(options); + ASSERT_EQ("NOT_FOUND", Get("a")); + ASSERT_EQ("NOT_FOUND", Get("b")); + + // However, Puts and Merges are preserved by flush. + ASSERT_OK(Put("a", "v")); + ASSERT_OK(Merge("b", "v")); + ASSERT_OK(Flush()); + ASSERT_EQ("v", Get("a")); + ASSERT_EQ("v", Get("b")); + + // Likewise, compaction does not apply filtering. + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ("v", Get("a")); + ASSERT_EQ("v", Get("b")); +} + TEST_P(DBTestCompactionFilterWithCompactParam, CompactionFilterWithValueChange) { Options options = CurrentOptions(); @@ -490,25 +549,25 @@ for (int i = 0; i < 100001; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); - Put(1, key, value); + ASSERT_OK(Put(1, key, value)); } // push all files to lower levels ASSERT_OK(Flush(1)); if (option_config_ != kUniversalCompactionMultiLevel && option_config_ != kUniversalSubcompactions) { - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); - dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); + ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); } else { - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], + nullptr, nullptr)); } // re-write all data again for (int i = 0; i < 100001; i++) { char key[100]; snprintf(key, sizeof(key), "B%010d", i); - Put(1, key, value); + ASSERT_OK(Put(1, key, value)); } // push all files to lower levels. This should @@ -516,11 +575,11 @@ ASSERT_OK(Flush(1)); if (option_config_ != kUniversalCompactionMultiLevel && option_config_ != kUniversalSubcompactions) { - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]); - dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1])); + ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1])); } else { - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, - nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], + nullptr, nullptr)); } // verify that all keys now have the new value that @@ -558,7 +617,7 @@ ASSERT_OK(Flush()); std::string newvalue = Get("foo"); ASSERT_EQ(newvalue, three); - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); newvalue = Get("foo"); ASSERT_EQ(newvalue, three); @@ -566,12 +625,12 @@ // merge keys. ASSERT_OK(db_->Put(WriteOptions(), "bar", two)); ASSERT_OK(Flush()); - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); newvalue = Get("bar"); ASSERT_EQ("NOT_FOUND", newvalue); ASSERT_OK(db_->Merge(WriteOptions(), "bar", two)); ASSERT_OK(Flush()); - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); newvalue = Get("bar"); ASSERT_EQ(two, two); @@ -582,7 +641,7 @@ ASSERT_OK(Flush()); newvalue = Get("foobar"); ASSERT_EQ(newvalue, three); - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); newvalue = Get("foobar"); ASSERT_EQ(newvalue, three); @@ -595,7 +654,7 @@ ASSERT_OK(Flush()); newvalue = Get("barfoo"); ASSERT_EQ(newvalue, four); - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); newvalue = Get("barfoo"); ASSERT_EQ(newvalue, four); } @@ -617,21 +676,21 @@ for (int i = 0; i < num_keys_per_file; i++) { char key[100]; snprintf(key, sizeof(key), "B%08d%02d", i, j); - Put(key, value); + ASSERT_OK(Put(key, value)); } - dbfull()->TEST_FlushMemTable(); + ASSERT_OK(dbfull()->TEST_FlushMemTable()); // Make sure next file is much smaller so automatic compaction will not // be triggered. num_keys_per_file /= 2; } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Force a manual compaction cfilter_count = 0; filter->expect_manual_compaction_.store(true); filter->expect_full_compaction_.store(true); filter->expect_cf_id_.store(0); - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(cfilter_count, 700); ASSERT_EQ(NumSortedRuns(0), 1); ASSERT_TRUE(filter->compaction_filter_created()); @@ -644,13 +703,14 @@ InternalKeyComparator icmp(options.comparator); ReadRangeDelAggregator range_del_agg(&icmp, kMaxSequenceNumber /* snapshots */); + ReadOptions read_options; ScopedArenaIterator iter(dbfull()->NewInternalIterator( - &arena, &range_del_agg, kMaxSequenceNumber)); + read_options, &arena, &range_del_agg, kMaxSequenceNumber)); iter->SeekToFirst(); ASSERT_OK(iter->status()); while (iter->Valid()) { ParsedInternalKey ikey(Slice(), 0, kTypeValue); - ASSERT_EQ(ParseInternalKey(iter->key(), &ikey), true); + ASSERT_OK(ParseInternalKey(iter->key(), &ikey, true /* log_err_key */)); total++; if (ikey.sequence != 0) { count++; @@ -680,14 +740,14 @@ for (int i = 0; i < num_keys_per_file; i++) { char key[100]; snprintf(key, sizeof(key), "B%08d%02d", i, j); - Put(1, key, value); + ASSERT_OK(Put(1, key, value)); } - Flush(1); + ASSERT_OK(Flush(1)); // Make sure next file is much smaller so automatic compaction will not // be triggered. num_keys_per_file /= 2; } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_TRUE(filter->compaction_filter_created()); } @@ -706,9 +766,9 @@ const Snapshot* snapshot = nullptr; for (int table = 0; table < 4; ++table) { for (int i = 0; i < 10; ++i) { - Put(ToString(table * 100 + i), "val"); + ASSERT_OK(Put(ToString(table * 100 + i), "val")); } - Flush(); + ASSERT_OK(Flush()); if (table == 0) { snapshot = db_->GetSnapshot(); @@ -728,6 +788,7 @@ read_options.snapshot = snapshot; std::unique_ptr iter(db_->NewIterator(read_options)); iter->SeekToFirst(); + ASSERT_OK(iter->status()); int count = 0; while (iter->Valid()) { count++; @@ -736,6 +797,7 @@ ASSERT_EQ(count, 6); read_options.snapshot = nullptr; std::unique_ptr iter1(db_->NewIterator(read_options)); + ASSERT_OK(iter1->status()); iter1->SeekToFirst(); count = 0; while (iter1->Valid()) { @@ -766,9 +828,9 @@ for (int i = table * 6; i < 39 + table * 11; ++i) { char key[100]; snprintf(key, sizeof(key), "%010d", table * 100 + i); - Put(key, std::to_string(table * 1000 + i)); + ASSERT_OK(Put(key, std::to_string(table * 1000 + i))); } - Flush(); + ASSERT_OK(Flush()); } cfilter_skips = 0; @@ -807,10 +869,10 @@ options.create_if_missing = true; DestroyAndReopen(options); - Put("0000000010", "v10"); - Put("0000000020", "v20"); // skipped - Put("0000000050", "v50"); - Flush(); + ASSERT_OK(Put("0000000010", "v10")); + ASSERT_OK(Put("0000000020", "v20")); // skipped + ASSERT_OK(Put("0000000050", "v50")); + ASSERT_OK(Flush()); cfilter_skips = 0; EXPECT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); @@ -848,13 +910,13 @@ options.compaction_filter = new TestNotSupportedFilter(); DestroyAndReopen(options); - Put("a", "v10"); - Put("z", "v20"); - Flush(); - - Put("a", "v10"); - Put("z", "v20"); - Flush(); + ASSERT_OK(Put("a", "v10")); + ASSERT_OK(Put("z", "v20")); + ASSERT_OK(Flush()); + + ASSERT_OK(Put("a", "v10")); + ASSERT_OK(Put("z", "v20")); + ASSERT_OK(Flush()); // Comapction should fail because IgnoreSnapshots() = false EXPECT_TRUE(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr) @@ -863,6 +925,49 @@ delete options.compaction_filter; } +class TestNotSupportedFilterFactory : public CompactionFilterFactory { + public: + explicit TestNotSupportedFilterFactory(TableFileCreationReason reason) + : reason_(reason) {} + + bool ShouldFilterTableFileCreation( + TableFileCreationReason reason) const override { + return reason_ == reason; + } + + std::unique_ptr CreateCompactionFilter( + const CompactionFilter::Context& /* context */) override { + return std::unique_ptr(new TestNotSupportedFilter()); + } + + const char* Name() const override { return "TestNotSupportedFilterFactory"; } + + private: + const TableFileCreationReason reason_; +}; + +TEST_F(DBTestCompactionFilter, IgnoreSnapshotsFalseDuringFlush) { + Options options = CurrentOptions(); + options.compaction_filter_factory = + std::make_shared( + TableFileCreationReason::kFlush); + Reopen(options); + + ASSERT_OK(Put("a", "v10")); + ASSERT_TRUE(Flush().IsNotSupported()); +} + +TEST_F(DBTestCompactionFilter, IgnoreSnapshotsFalseRecovery) { + Options options = CurrentOptions(); + options.compaction_filter_factory = + std::make_shared( + TableFileCreationReason::kRecovery); + Reopen(options); + + ASSERT_OK(Put("a", "v10")); + ASSERT_TRUE(TryReopen(options).IsNotSupported()); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_compaction_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_compaction_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_compaction_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_compaction_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -7,16 +7,23 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include + +#include "db/blob/blob_index.h" #include "db/db_test_util.h" +#include "env/mock_env.h" #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/concurrent_task_limiter.h" #include "rocksdb/experimental.h" #include "rocksdb/sst_file_writer.h" #include "rocksdb/utilities/convenience.h" -#include "test_util/fault_injection_test_env.h" #include "test_util/sync_point.h" +#include "test_util/testutil.h" #include "util/concurrent_task_limiter_impl.h" +#include "util/random.h" +#include "utilities/fault_injection_env.h" +#include "utilities/fault_injection_fs.h" namespace ROCKSDB_NAMESPACE { @@ -25,14 +32,16 @@ class DBCompactionTest : public DBTestBase { public: - DBCompactionTest() : DBTestBase("/db_compaction_test") {} + DBCompactionTest() + : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) {} }; class DBCompactionTestWithParam : public DBTestBase, public testing::WithParamInterface> { public: - DBCompactionTestWithParam() : DBTestBase("/db_compaction_test") { + DBCompactionTestWithParam() + : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { max_subcompactions_ = std::get<0>(GetParam()); exclusive_manual_compaction_ = std::get<1>(GetParam()); } @@ -45,12 +54,34 @@ bool exclusive_manual_compaction_; }; +class DBCompactionTestWithBottommostParam + : public DBTestBase, + public testing::WithParamInterface { + public: + DBCompactionTestWithBottommostParam() + : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { + bottommost_level_compaction_ = GetParam(); + } + + BottommostLevelCompaction bottommost_level_compaction_; +}; + class DBCompactionDirectIOTest : public DBCompactionTest, public ::testing::WithParamInterface { public: DBCompactionDirectIOTest() : DBCompactionTest() {} }; +// Param = true : target level is non-empty +// Param = false: level between target level and source level +// is not empty. +class ChangeLevelConflictsWithAuto + : public DBCompactionTest, + public ::testing::WithParamInterface { + public: + ChangeLevelConflictsWithAuto() : DBCompactionTest() {} +}; + namespace { class FlushedFileCollector : public EventListener { @@ -151,27 +182,28 @@ options.target_file_size_base * options.target_file_size_multiplier; options.max_bytes_for_level_multiplier = 2; options.disable_auto_compactions = false; + options.compaction_options_universal.max_size_amplification_percent = 100; return options; } bool HaveOverlappingKeyRanges( const Comparator* c, const SstFileMetaData& a, const SstFileMetaData& b) { - if (c->Compare(a.smallestkey, b.smallestkey) >= 0) { - if (c->Compare(a.smallestkey, b.largestkey) <= 0) { + if (c->CompareWithoutTimestamp(a.smallestkey, b.smallestkey) >= 0) { + if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) { // b.smallestkey <= a.smallestkey <= b.largestkey return true; } - } else if (c->Compare(a.largestkey, b.smallestkey) >= 0) { + } else if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) { // a.smallestkey < b.smallestkey <= a.largestkey return true; } - if (c->Compare(a.largestkey, b.largestkey) <= 0) { - if (c->Compare(a.largestkey, b.smallestkey) >= 0) { + if (c->CompareWithoutTimestamp(a.largestkey, b.largestkey) <= 0) { + if (c->CompareWithoutTimestamp(a.largestkey, b.smallestkey) >= 0) { // b.smallestkey <= a.largestkey <= b.largestkey return true; } - } else if (c->Compare(a.smallestkey, b.largestkey) <= 0) { + } else if (c->CompareWithoutTimestamp(a.smallestkey, b.largestkey) <= 0) { // a.smallestkey <= b.largestkey < a.largestkey return true; } @@ -226,7 +258,7 @@ const CompactionStatsCollector& collector) { #ifndef NDEBUG InternalStats* internal_stats_ptr = cfd.internal_stats(); - ASSERT_TRUE(internal_stats_ptr != nullptr); + ASSERT_NE(internal_stats_ptr, nullptr); const std::vector& comp_stats = internal_stats_ptr->TEST_GetCompactionStats(); const int num_of_reasons = static_cast(CompactionReason::kNumOfReasons); @@ -270,7 +302,7 @@ } } // anonymous namespace -#ifndef ROCKSDB_VALGRIND_RUN +#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) // All the TEST_P tests run once with sub_compactions disabled (i.e. // options.max_subcompactions = 1) and once with it enabled TEST_P(DBCompactionTestWithParam, CompactionDeletionTrigger) { @@ -295,25 +327,47 @@ const int kTestSize = kCDTKeysPerBuffer * 1024; std::vector values; for (int k = 0; k < kTestSize; ++k) { - values.push_back(RandomString(&rnd, kCDTValueSize)); + values.push_back(rnd.RandomString(kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); - db_size[0] = Size(Key(0), Key(kTestSize - 1)); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0])); for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); - db_size[1] = Size(Key(0), Key(kTestSize - 1)); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1])); - // must have much smaller db size. - ASSERT_GT(db_size[0] / 3, db_size[1]); + if (options.compaction_style == kCompactionStyleUniversal) { + // Claim: in universal compaction none of the original data will remain + // once compactions settle. + // + // Proof: The compensated size of the file containing the most tombstones + // is enough on its own to trigger size amp compaction. Size amp + // compaction is a full compaction, so all tombstones meet the obsolete + // keys they cover. + ASSERT_EQ(0, db_size[1]); + } else { + // Claim: in level compaction at most `db_size[0] / 2` of the original + // data will remain once compactions settle. + // + // Proof: Assume the original data is all in the bottom level. If it were + // not, it would meet its tombstone sooner. The original data size is + // large enough to require fanout to bottom level to be greater than + // `max_bytes_for_level_multiplier == 2`. In the level just above, + // tombstones must cover less than `db_size[0] / 4` bytes since fanout >= + // 2 and file size is compensated by doubling the size of values we expect + // are covered (`kDeletionWeightOnCompaction == 2`). The tombstones in + // levels above must cover less than `db_size[0] / 8` bytes of original + // data, `db_size[0] / 16`, and so on. + ASSERT_GT(db_size[0] / 2, db_size[1]); + } } } -#endif // ROCKSDB_VALGRIND_RUN +#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) TEST_P(DBCompactionTestWithParam, CompactionsPreserveDeletes) { // For each options type we test following @@ -343,7 +397,7 @@ const int kTestSize = kCDTKeysPerBuffer; std::vector values; for (int k = 0; k < kTestSize; ++k) { - values.push_back(RandomString(&rnd, kCDTValueSize)); + values.push_back(rnd.RandomString(kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } @@ -357,8 +411,9 @@ cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_TRUE( + dbfull()->CompactRange(cro, nullptr, nullptr).IsInvalidArgument()); // check that normal user iterator doesn't see anything Iterator* db_iter = dbfull()->NewIterator(ReadOptions()); @@ -366,6 +421,7 @@ for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { i++; } + ASSERT_OK(db_iter->status()); ASSERT_EQ(i, 0); delete db_iter; @@ -373,6 +429,7 @@ ReadOptions ro; ro.iter_start_seqnum=1; db_iter = dbfull()->NewIterator(ro); + ASSERT_OK(db_iter->status()); i = 0; for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { i++; @@ -382,9 +439,10 @@ // now all deletes should be gone SetPreserveDeletesSequenceNumber(100000000); - dbfull()->CompactRange(cro, nullptr, nullptr); + ASSERT_NOK(dbfull()->CompactRange(cro, nullptr, nullptr)); db_iter = dbfull()->NewIterator(ro); + ASSERT_TRUE(db_iter->status().IsInvalidArgument()); i = 0; for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) { i++; @@ -408,7 +466,7 @@ const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { - values.push_back(RandomString(&rnd, kCDTValueSize)); + values.push_back(rnd.RandomString(kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } @@ -446,6 +504,10 @@ options.new_table_reader_for_compaction_inputs = true; options.max_open_files = 20; options.level0_file_num_compaction_trigger = 3; + // Avoid many shards with small max_open_files, where as little as + // two table insertions could lead to an LRU eviction, depending on + // hash values. + options.table_cache_numshardbits = 2; DestroyAndReopen(options); Random rnd(301); @@ -470,8 +532,8 @@ ASSERT_OK(Put(Key(10 - k), "bar")); if (k < options.level0_file_num_compaction_trigger - 1) { num_table_cache_lookup = 0; - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // preloading iterator issues one table cache lookup and create // a new table reader, if not preloaded. int old_num_table_cache_lookup = num_table_cache_lookup; @@ -489,8 +551,8 @@ num_table_cache_lookup = 0; num_new_table_reader = 0; - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Preloading iterator issues one table cache lookup and creates // a new table reader. One file is created for flush and one for compaction. // Compaction inputs make no table cache look-up for data/range deletion @@ -517,7 +579,7 @@ cro.change_level = true; cro.target_level = 2; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - db_->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); // Only verifying compaction outputs issues one table cache lookup // for both data block and range deletion block). // May preload table cache too. @@ -555,12 +617,12 @@ const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { - values.push_back(RandomString(&rnd, kCDTValueSize)); + values.push_back(rnd.RandomString(kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); - db_size[0] = Size(Key(0), Key(kTestSize - 1)); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0])); Close(); // round 2 --- disable auto-compactions and issue deletions. @@ -571,11 +633,10 @@ for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } - db_size[1] = Size(Key(0), Key(kTestSize - 1)); + ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1])); Close(); - // as auto_compaction is off, we shouldn't see too much reduce - // in db size. - ASSERT_LT(db_size[0] / 3, db_size[1]); + // as auto_compaction is off, we shouldn't see any reduction in db size. + ASSERT_LE(db_size[0], db_size[1]); // round 3 --- reopen db with auto_compaction on and see if // deletion compensation still work. @@ -585,14 +646,86 @@ for (int k = 0; k < kTestSize / 10; ++k) { ASSERT_OK(Put(Key(k), values[k])); } - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); - db_size[2] = Size(Key(0), Key(kTestSize - 1)); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[2])); // this time we're expecting significant drop in size. - ASSERT_GT(db_size[0] / 3, db_size[2]); + // + // See "CompactionDeletionTrigger" test for proof that at most + // `db_size[0] / 2` of the original data remains. In addition to that, this + // test inserts `db_size[0] / 10` to push the tombstones into SST files and + // then through automatic compactions. So in total `3 * db_size[0] / 5` of + // the original data may remain. + ASSERT_GT(3 * db_size[0] / 5, db_size[2]); } } +TEST_F(DBCompactionTest, CompactRangeBottomPri) { + ASSERT_OK(Put(Key(50), "")); + ASSERT_OK(Flush()); + ASSERT_OK(Put(Key(100), "")); + ASSERT_OK(Flush()); + ASSERT_OK(Put(Key(200), "")); + ASSERT_OK(Flush()); + + { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 2; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + } + ASSERT_EQ("0,0,3", FilesPerLevel(0)); + + ASSERT_OK(Put(Key(1), "")); + ASSERT_OK(Put(Key(199), "")); + ASSERT_OK(Flush()); + ASSERT_OK(Put(Key(2), "")); + ASSERT_OK(Put(Key(199), "")); + ASSERT_OK(Flush()); + ASSERT_EQ("2,0,3", FilesPerLevel(0)); + + // Now we have 2 L0 files, and 3 L2 files, and a manual compaction will + // be triggered. + // Two compaction jobs will run. One compacts 2 L0 files in Low Pri Pool + // and one compact to L2 in bottom pri pool. + int low_pri_count = 0; + int bottom_pri_count = 0; + SyncPoint::GetInstance()->SetCallBack( + "ThreadPoolImpl::Impl::BGThread:BeforeRun", [&](void* arg) { + Env::Priority* pri = reinterpret_cast(arg); + // First time is low pri pool in the test case. + if (low_pri_count == 0 && bottom_pri_count == 0) { + ASSERT_EQ(Env::Priority::LOW, *pri); + } + if (*pri == Env::Priority::LOW) { + low_pri_count++; + } else { + bottom_pri_count++; + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + env_->SetBackgroundThreads(1, Env::Priority::BOTTOM); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_EQ(1, low_pri_count); + ASSERT_EQ(1, bottom_pri_count); + ASSERT_EQ("0,0,2", FilesPerLevel(0)); + + // Recompact bottom most level uses bottom pool + CompactRangeOptions cro; + cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + ASSERT_EQ(1, low_pri_count); + ASSERT_EQ(2, bottom_pri_count); + + env_->SetBackgroundThreads(0, Env::Priority::BOTTOM); + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + // Low pri pool is used if bottom pool has size 0. + ASSERT_EQ(2, low_pri_count); + ASSERT_EQ(2, bottom_pri_count); + + SyncPoint::GetInstance()->DisableProcessing(); +} + TEST_F(DBCompactionTest, DisableStatsUpdateReopen) { uint64_t db_size[3]; for (int test = 0; test < 2; ++test) { @@ -607,12 +740,19 @@ const int kTestSize = kCDTKeysPerBuffer * 512; std::vector values; for (int k = 0; k < kTestSize; ++k) { - values.push_back(RandomString(&rnd, kCDTValueSize)); + values.push_back(rnd.RandomString(kCDTValueSize)); ASSERT_OK(Put(Key(k), values[k])); } - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); - db_size[0] = Size(Key(0), Key(kTestSize - 1)); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + // L1 and L2 can fit deletions iff size compensation does not take effect, + // i.e., when `skip_stats_update_on_db_open == true`. Move any remaining + // files at or above L2 down to L3 to ensure obsolete data does not + // accidentally meet its tombstone above L3. This makes the final size more + // deterministic and easy to see whether size compensation for deletions + // took effect. + MoveFilesToLevel(3 /* level */); + ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[0])); Close(); // round 2 --- disable auto-compactions and issue deletions. @@ -625,27 +765,33 @@ for (int k = 0; k < kTestSize; ++k) { ASSERT_OK(Delete(Key(k))); } - db_size[1] = Size(Key(0), Key(kTestSize - 1)); + ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[1])); Close(); - // as auto_compaction is off, we shouldn't see too much reduce - // in db size. - ASSERT_LT(db_size[0] / 3, db_size[1]); + // as auto_compaction is off, we shouldn't see any reduction in db size. + ASSERT_LE(db_size[0], db_size[1]); // round 3 --- reopen db with auto_compaction on and see if // deletion compensation still work. options.disable_auto_compactions = false; Reopen(options); - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); - db_size[2] = Size(Key(0), Key(kTestSize - 1)); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_OK(Size(Key(0), Key(kTestSize - 1), &db_size[2])); if (options.skip_stats_update_on_db_open) { // If update stats on DB::Open is disable, we don't expect // deletion entries taking effect. - ASSERT_LT(db_size[0] / 3, db_size[2]); + // + // The deletions are small enough to fit in L1 and L2, and obsolete keys + // were moved to L3+, so none of the original data should have been + // dropped. + ASSERT_LE(db_size[0], db_size[2]); } else { // Otherwise, we should see a significant drop in db size. - ASSERT_GT(db_size[0] / 3, db_size[2]); + // + // See "CompactionDeletionTrigger" test for proof that at most + // `db_size[0] / 2` of the original data remains. + ASSERT_GT(db_size[0] / 2, db_size[2]); } } } @@ -660,7 +806,8 @@ options.num_levels = 3; options.level0_file_num_compaction_trigger = 3; options.max_subcompactions = max_subcompactions_; - options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); + options.memtable_factory.reset( + test::NewSpecialSkipListFactory(kNumKeysPerFile)); CreateAndReopenWithCF({"pikachu"}, options); Random rnd(301); @@ -670,24 +817,24 @@ std::vector values; // Write 100KB (100 values, each 1K) for (int i = 0; i < kNumKeysPerFile; i++) { - values.push_back(RandomString(&rnd, 990)); + values.push_back(rnd.RandomString(990)); ASSERT_OK(Put(1, Key(i), values[i])); } // put extra key to trigger flush ASSERT_OK(Put(1, "", "")); - dbfull()->TEST_WaitForFlushMemTable(handles_[1]); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); ASSERT_EQ(NumTableFilesAtLevel(0, 1), num + 1); } // generate one more file in level-0, and should trigger level-0 compaction std::vector values; for (int i = 0; i < kNumKeysPerFile; i++) { - values.push_back(RandomString(&rnd, 990)); + values.push_back(rnd.RandomString(990)); ASSERT_OK(Put(1, Key(i), values[i])); } // put extra key to trigger flush ASSERT_OK(Put(1, "", "")); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 1), 1); @@ -707,7 +854,8 @@ options.level0_slowdown_writes_trigger = 20; options.soft_pending_compaction_bytes_limit = 1 << 30; // Infinitely large options.max_background_compactions = 3; - options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); + options.memtable_factory.reset( + test::NewSpecialSkipListFactory(kNumKeysPerFile)); // Block all threads in thread pool. const size_t kTotalTasks = 4; @@ -729,7 +877,7 @@ } // put extra key to trigger flush ASSERT_OK(Put(cf, "", "")); - dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1); } } @@ -746,7 +894,7 @@ } // put extra key to trigger flush ASSERT_OK(Put(2, "", "")); - dbfull()->TEST_WaitForFlushMemTable(handles_[2]); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2])); ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1, NumTableFilesAtLevel(0, 2)); } @@ -757,7 +905,7 @@ sleeping_tasks[i].WakeUp(); sleeping_tasks[i].WaitUntilDone(); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Verify number of compactions allowed will come back to 1. @@ -774,7 +922,7 @@ } // put extra key to trigger flush ASSERT_OK(Put(cf, "", "")); - dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); ASSERT_EQ(NumTableFilesAtLevel(0, cf), num + 1); } } @@ -801,14 +949,14 @@ ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); std::vector values; for (int i = 0; i < 80; i++) { - values.push_back(RandomString(&rnd, 100000)); + values.push_back(rnd.RandomString(100000)); ASSERT_OK(Put(1, Key(i), values[i])); } // Reopening moves updates to level-0 ReopenWithColumnFamilies({"default", "pikachu"}, options); - dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], - true /* disallow trivial move */); + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1], + true /* disallow trivial move */)); ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0); ASSERT_GT(NumTableFilesAtLevel(1, 1), 1); @@ -852,27 +1000,27 @@ DestroyAndReopen(options); // create first file and flush to l0 - Put("4", "A"); - Put("3", "A"); - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); - - Put("2", "A"); - Delete("3"); - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(Put("4", "A")); + ASSERT_OK(Put("3", "A")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + + ASSERT_OK(Put("2", "A")); + ASSERT_OK(Delete("3")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_EQ("NOT_FOUND", Get("3")); // move both files down to l1 - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("NOT_FOUND", Get("3")); for (int i = 0; i < 3; i++) { - Put("2", "B"); - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(Put("2", "B")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("NOT_FOUND", Get("3")); } @@ -885,31 +1033,85 @@ DestroyAndReopen(options); // create first file and flush to l0 - Put("4", "A"); - Put("3", "A"); - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); - - Put("2", "A"); - SingleDelete("3"); - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(Put("4", "A")); + ASSERT_OK(Put("3", "A")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + + ASSERT_OK(Put("2", "A")); + ASSERT_OK(SingleDelete("3")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_EQ("NOT_FOUND", Get("3")); // move both files down to l1 - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ("NOT_FOUND", Get("3")); for (int i = 0; i < 3; i++) { - Put("2", "B"); - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(Put("2", "B")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("NOT_FOUND", Get("3")); } +TEST_F(DBCompactionTest, CompactionSstPartitioner) { + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleLevel; + options.level0_file_num_compaction_trigger = 3; + std::shared_ptr factory( + NewSstPartitionerFixedPrefixFactory(4)); + options.sst_partitioner_factory = factory; + + DestroyAndReopen(options); + + // create first file and flush to l0 + ASSERT_OK(Put("aaaa1", "A")); + ASSERT_OK(Put("bbbb1", "B")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + + ASSERT_OK(Put("aaaa1", "A2")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + + // move both files down to l1 + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + + std::vector files; + dbfull()->GetLiveFilesMetaData(&files); + ASSERT_EQ(2, files.size()); + ASSERT_EQ("A2", Get("aaaa1")); + ASSERT_EQ("B", Get("bbbb1")); +} + +TEST_F(DBCompactionTest, CompactionSstPartitionerNonTrivial) { + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleLevel; + options.level0_file_num_compaction_trigger = 1; + std::shared_ptr factory( + NewSstPartitionerFixedPrefixFactory(4)); + options.sst_partitioner_factory = factory; + + DestroyAndReopen(options); + + // create first file and flush to l0 + ASSERT_OK(Put("aaaa1", "A")); + ASSERT_OK(Put("bbbb1", "B")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); + + std::vector files; + dbfull()->GetLiveFilesMetaData(&files); + ASSERT_EQ(2, files.size()); + ASSERT_EQ("A", Get("aaaa1")); + ASSERT_EQ("B", Get("bbbb1")); +} + TEST_F(DBCompactionTest, ZeroSeqIdCompaction) { Options options = CurrentOptions(); options.compaction_style = kCompactionStyleLevel; @@ -931,22 +1133,23 @@ // create first file and flush to l0 for (auto& key : {"1", "2", "3", "3", "3", "3"}) { - Put(key, std::string(key_len, 'A')); + ASSERT_OK(Put(key, std::string(key_len, 'A'))); snaps.push_back(dbfull()->GetSnapshot()); } - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); // create second file and flush to l0 for (auto& key : {"3", "4", "5", "6", "7", "8"}) { - Put(key, std::string(key_len, 'A')); + ASSERT_OK(Put(key, std::string(key_len, 'A'))); snaps.push_back(dbfull()->GetSnapshot()); } - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); // move both files down to l1 - dbfull()->CompactFiles(compact_opt, collector->GetFlushedFiles(), 1); + ASSERT_OK( + dbfull()->CompactFiles(compact_opt, collector->GetFlushedFiles(), 1)); // release snap so that first instance of key(3) can have seqId=0 for (auto snap : snaps) { @@ -955,12 +1158,12 @@ // create 3 files in l0 so to trigger compaction for (int i = 0; i < options.level0_file_num_compaction_trigger; i++) { - Put("2", std::string(1, 'A')); - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(Put("2", std::string(1, 'A'))); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_OK(Put("", "")); } @@ -975,12 +1178,12 @@ for (int i = 0; i < 2; ++i) { for (int j = 0; j < options.level0_file_num_compaction_trigger; j++) { // make l0 files' ranges overlap to avoid trivial move - Put(std::to_string(2 * i), std::string(1, 'A')); - Put(std::to_string(2 * i + 1), std::string(1, 'A')); - Flush(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(Put(std::to_string(2 * i), std::string(1, 'A'))); + ASSERT_OK(Put(std::to_string(2 * i + 1), std::string(1, 'A'))); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 0), i + 1); } @@ -996,7 +1199,7 @@ // note CompactionOptions::output_file_size_limit is unset. CompactionOptions compact_opt; compact_opt.compression = kNoCompression; - dbfull()->CompactFiles(compact_opt, input_filenames, 1); + ASSERT_OK(dbfull()->CompactFiles(compact_opt, input_filenames, 1)); } // Check that writes done during a memtable compaction are recovered @@ -1039,7 +1242,7 @@ Random rnd(301); std::vector values; for (int i = 0; i < num_keys; i++) { - values.push_back(RandomString(&rnd, value_size)); + values.push_back(rnd.RandomString(value_size)); ASSERT_OK(Put(Key(i), values[i])); } @@ -1057,7 +1260,7 @@ cro.exclusive_manual_compaction = exclusive_manual_compaction_; // Compaction will initiate a trivial move from L0 to L1 - dbfull()->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); // File moved From L0 to L1 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); // 0 files in L0 @@ -1111,7 +1314,7 @@ std::map values; for (size_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { - values[j] = RandomString(&rnd, value_size); + values[j] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(j), values[j])); } ASSERT_OK(Flush()); @@ -1126,7 +1329,7 @@ // Since data is non-overlapping we expect compaction to initiate // a trivial move - db_->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); // We expect that all the files were trivially moved from L0 to L1 ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); ASSERT_EQ(NumTableFilesAtLevel(1, 0) /* level1_files */, level0_files); @@ -1157,13 +1360,13 @@ }; for (size_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { - values[j] = RandomString(&rnd, value_size); + values[j] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(j), values[j])); } ASSERT_OK(Flush()); } - db_->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); for (size_t i = 0; i < ranges.size(); i++) { for (int32_t j = ranges[i].first; j <= ranges[i].second; j++) { @@ -1202,14 +1405,14 @@ // file 1 [0 => 300] for (int32_t i = 0; i <= 300; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [600 => 700] for (int32_t i = 600; i <= 700; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1283,14 +1486,14 @@ // file 1 [0 => 100] for (int32_t i = 0; i < 100; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [100 => 300] for (int32_t i = 100; i < 300; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1311,7 +1514,7 @@ // file 3 [ 0 => 200] for (int32_t i = 0; i < 200; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1343,21 +1546,21 @@ TEST_SYNC_POINT("DBCompaction::ManualPartial:1"); // file 4 [300 => 400) for (int32_t i = 300; i <= 400; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 5 [400 => 500) for (int32_t i = 400; i <= 500; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 6 [500 => 600) for (int32_t i = 500; i <= 600; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } // Second non-trivial compaction is triggered @@ -1367,8 +1570,8 @@ ASSERT_EQ("3,0,0,0,0,1,2", FilesPerLevel(0)); TEST_SYNC_POINT("DBCompaction::ManualPartial:5"); - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // After two non-trivial compactions are installed, there is 1 file in L6, and // 1 file in L1 ASSERT_EQ("0,1,0,0,0,0,1", FilesPerLevel(0)); @@ -1425,14 +1628,14 @@ // file 1 [0 => 100] for (int32_t i = 0; i < 100; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [100 => 300] for (int32_t i = 100; i < 300; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1451,7 +1654,7 @@ // file 3 [ 0 => 200] for (int32_t i = 0; i < 200; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1481,9 +1684,9 @@ for (int32_t j = 300; j < 4300; j++) { if (j == 2300) { ASSERT_OK(Flush()); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - values[j] = RandomString(&rnd, value_size); + values[j] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(j), values[j])); } } @@ -1497,8 +1700,8 @@ } TEST_SYNC_POINT("DBCompaction::PartialFill:2"); - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); threads.join(); for (int32_t i = 0; i < 4300; i++) { @@ -1516,12 +1719,12 @@ Options options = CurrentOptions(); options.unordered_write = true; DestroyAndReopen(options); - Put("foo", "v1"); + ASSERT_OK(Put("foo", "v1")); ASSERT_OK(Flush()); - Put("bar", "v1"); + ASSERT_OK(Put("bar", "v1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - port::Thread writer([&]() { Put("foo", "v2"); }); + port::Thread writer([&]() { ASSERT_OK(Put("foo", "v2")); }); TEST_SYNC_POINT( "DBCompactionTest::ManualCompactionWithUnorderedWrite:WaitWriteWAL"); @@ -1554,14 +1757,14 @@ // file 1 [0 => 100] for (int32_t i = 0; i < 100; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); // file 2 [100 => 300] for (int32_t i = 100; i < 300; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1577,7 +1780,7 @@ // file 3 [ 0 => 200] for (int32_t i = 0; i < 200; i++) { - values[i] = RandomString(&rnd, value_size); + values[i] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1587,15 +1790,15 @@ for (int32_t j = 300; j < 4300; j++) { if (j == 2300) { ASSERT_OK(Flush()); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - values[j] = RandomString(&rnd, value_size); + values[j] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(j), values[j])); } } ASSERT_OK(Flush()); - dbfull()->TEST_WaitForFlushMemTable(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Verify level sizes uint64_t target_size = 4 * options.max_bytes_for_level_base; @@ -1605,7 +1808,7 @@ options.max_bytes_for_level_multiplier); } - size_t old_num_files = CountFiles(); + const size_t old_num_files = CountFiles(); std::string begin_string = Key(1000); std::string end_string = Key(2000); Slice begin(begin_string); @@ -1640,7 +1843,7 @@ compact_options.change_level = true; compact_options.target_level = 1; ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr)); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_OK( DeleteFilesInRange(db_, db_->DefaultColumnFamily(), nullptr, nullptr)); @@ -1649,12 +1852,11 @@ for (int32_t i = 0; i < 4300; i++) { ReadOptions roptions; std::string result; - Status s = db_->Get(roptions, Key(i), &result); - ASSERT_TRUE(s.IsNotFound()); + ASSERT_TRUE(db_->Get(roptions, Key(i), &result).IsNotFound()); deleted_count2++; } ASSERT_GT(deleted_count2, deleted_count); - size_t new_num_files = CountFiles(); + const size_t new_num_files = CountFiles(); ASSERT_GT(old_num_files, new_num_files); } @@ -1676,7 +1878,7 @@ for (auto i = 0; i < 10; i++) { for (auto j = 0; j < 100; j++) { auto k = i * 100 + j; - values[k] = RandomString(&rnd, value_size); + values[k] = rnd.RandomString(value_size); ASSERT_OK(Put(Key(k), values[k])); } ASSERT_OK(Flush()); @@ -1808,15 +2010,15 @@ // would cause `1 -> vals[0]` (an older key) to reappear. std::string vals[kNumL0Files]; for (int i = 0; i < kNumL0Files; ++i) { - vals[i] = RandomString(&rnd, kValSize); - Put(Key(i), vals[i]); - Put(Key(i + 1), vals[i]); - Flush(); + vals[i] = rnd.RandomString(kValSize); + ASSERT_OK(Put(Key(i), vals[i])); + ASSERT_OK(Put(Key(i + 1), vals[i])); + ASSERT_OK(Flush()); if (i == 0) { snapshot = db_->GetSnapshot(); } } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Verify `DeleteFilesInRange` can't drop only file 0 which would cause // "1 -> vals[0]" to reappear. @@ -1850,7 +2052,7 @@ std::vector values; // File with keys [ 0 => 99 ] for (int i = 0; i < 100; i++) { - values.push_back(RandomString(&rnd, value_size)); + values.push_back(rnd.RandomString(value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1868,7 +2070,7 @@ // File with keys [ 100 => 199 ] for (int i = 100; i < 200; i++) { - values.push_back(RandomString(&rnd, value_size)); + values.push_back(rnd.RandomString(value_size)); ASSERT_OK(Put(Key(i), values[i])); } ASSERT_OK(Flush()); @@ -1895,7 +2097,7 @@ options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); options.memtable_factory.reset( - new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); + test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; @@ -1903,16 +2105,8 @@ options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; - // options = CurrentOptions(options); - std::vector filenames; - env_->GetChildren(options.db_paths[1].path, &filenames); - // Delete archival files. - for (size_t i = 0; i < filenames.size(); ++i) { - env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]); - } - env_->DeleteDir(options.db_paths[1].path); - Reopen(options); + DestroyAndReopen(options); Random rnd(301); int key_idx = 0; @@ -2012,7 +2206,7 @@ options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); options.memtable_factory.reset( - new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); + test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; @@ -2020,16 +2214,8 @@ options.num_levels = 4; options.max_bytes_for_level_base = 400 * 1024; options.max_subcompactions = max_subcompactions_; - // options = CurrentOptions(options); - std::vector filenames; - env_->GetChildren(options.db_paths[1].path, &filenames); - // Delete archival files. - for (size_t i = 0; i < filenames.size(); ++i) { - env_->DeleteFile(options.db_paths[1].path + "/" + filenames[i]); - } - env_->DeleteDir(options.db_paths[1].path); - Reopen(options); + DestroyAndReopen(options); Random rnd(301); int key_idx = 0; @@ -2130,7 +2316,7 @@ options.db_paths.emplace_back(dbname_ + "_2", 4 * 1024 * 1024); options.db_paths.emplace_back(dbname_ + "_3", 1024 * 1024 * 1024); options.memtable_factory.reset( - new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); + test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; @@ -2149,7 +2335,7 @@ option_vector.emplace_back(DBOptions(options), cf_opt1); CreateColumnFamilies({"one"},option_vector[1]); - // Configura CF2 specific paths. + // Configure CF2 specific paths. cf_opt2.cf_paths.emplace_back(dbname_ + "cf2", 500 * 1024); cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_2", 4 * 1024 * 1024); cf_opt2.cf_paths.emplace_back(dbname_ + "cf2_3", 1024 * 1024 * 1024); @@ -2204,13 +2390,16 @@ // Check that default column family uses db_paths. // And Column family "one" uses cf_paths. - // First three 110KB files are not going to second path. - // After that, (100K, 200K) + // The compaction in level0 outputs the sst files in level1. + // The first path cannot hold level1's data(400KB+400KB > 500KB), + // so every compaction move a sst file to second path. Please + // refer to LevelCompactionBuilder::GetPathId. for (int num = 0; num < 3; num++) { generate_file(); } + check_sstfilecount(0, 1); + check_sstfilecount(1, 2); - // Another 110KB triggers a compaction to 400K file to fill up first path generate_file(); check_sstfilecount(1, 3); @@ -2263,10 +2452,10 @@ for (int i = 0; i <= max_key_level_insert; i++) { // each value is 10K - ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); + ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000))); } ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_GT(TotalTableFiles(1, 4), 1); int non_level0_num_files = 0; @@ -2302,7 +2491,8 @@ compact_options.bottommost_level_compaction = BottommostLevelCompaction::kForce; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr); + ASSERT_OK( + dbfull()->CompactRange(compact_options, handles_[1], nullptr, nullptr)); // Only 1 file in L0 ASSERT_EQ("1", FilesPerLevel(1)); @@ -2321,11 +2511,11 @@ ReopenWithColumnFamilies({"default", "pikachu"}, options); for (int i = max_key_level_insert / 2; i <= max_key_universal_insert; i++) { - ASSERT_OK(Put(1, Key(i), RandomString(&rnd, 10000))); + ASSERT_OK(Put(1, Key(i), rnd.RandomString(10000))); } - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); ASSERT_OK(Flush(1)); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); for (int i = 1; i < options.num_levels; i++) { ASSERT_EQ(NumTableFilesAtLevel(i, 1), 0); @@ -2335,6 +2525,7 @@ // compaction style std::string keys_in_db; Iterator* iter = dbfull()->NewIterator(ReadOptions(), handles_[1]); + ASSERT_OK(iter->status()); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { keys_in_db.append(iter->key().ToString()); keys_in_db.push_back(','); @@ -2372,24 +2563,24 @@ TEST_F(DBCompactionTest, L0_CompactionBug_Issue44_b) { do { CreateAndReopenWithCF({"pikachu"}, CurrentOptions()); - Put(1, "", ""); + ASSERT_OK(Put(1, "", "")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - Delete(1, "e"); - Put(1, "", ""); + ASSERT_OK(Delete(1, "e")); + ASSERT_OK(Put(1, "", "")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - Put(1, "c", "cv"); + ASSERT_OK(Put(1, "c", "cv")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - Put(1, "", ""); + ASSERT_OK(Put(1, "", "")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - Put(1, "", ""); + ASSERT_OK(Put(1, "", "")); env_->SleepForMicroseconds(1000000); // Wait for compaction to finish ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - Put(1, "d", "dv"); + ASSERT_OK(Put(1, "d", "dv")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - Put(1, "", ""); + ASSERT_OK(Put(1, "", "")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); - Delete(1, "d"); - Delete(1, "b"); + ASSERT_OK(Delete(1, "d")); + ASSERT_OK(Delete(1, "b")); ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions()); ASSERT_EQ("(->)(c->cv)", Contents(1)); env_->SleepForMicroseconds(1000000); // Wait for compaction to finish @@ -2406,34 +2597,35 @@ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - Put(1, "foo", ""); - Put(1, "bar", ""); - Flush(1); - Put(1, "foo", ""); - Put(1, "bar", ""); + ASSERT_OK(Put(1, "foo", "")); + ASSERT_OK(Put(1, "bar", "")); + ASSERT_OK(Flush(1)); + ASSERT_OK(Put(1, "foo", "")); + ASSERT_OK(Put(1, "bar", "")); // Generate four files in CF 0, which should trigger an auto compaction - Put("foo", ""); - Put("bar", ""); - Flush(); - Put("foo", ""); - Put("bar", ""); - Flush(); - Put("foo", ""); - Put("bar", ""); - Flush(); - Put("foo", ""); - Put("bar", ""); - Flush(); + ASSERT_OK(Put("foo", "")); + ASSERT_OK(Put("bar", "")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("foo", "")); + ASSERT_OK(Put("bar", "")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("foo", "")); + ASSERT_OK(Put("bar", "")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("foo", "")); + ASSERT_OK(Put("bar", "")); + ASSERT_OK(Flush()); // The auto compaction is scheduled but waited until here TEST_SYNC_POINT("DBCompactionTest::ManualAutoRace:1"); // The auto compaction will wait until the manual compaction is registerd // before processing so that it will be cancelled. - dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, + nullptr)); ASSERT_EQ("0,1", FilesPerLevel(1)); // Eventually the cancelled compaction will be rescheduled and executed. - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,1", FilesPerLevel(0)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } @@ -2459,7 +2651,7 @@ ASSERT_EQ("1,1,1", FilesPerLevel(1)); // Compaction range overlaps files - Compact(1, "p1", "p9"); + Compact(1, "p", "q"); ASSERT_EQ("0,0,1", FilesPerLevel(1)); // Populate a different range @@ -2478,7 +2670,7 @@ options.statistics->getTickerCount(BLOCK_CACHE_ADD); CompactRangeOptions cro; cro.exclusive_manual_compaction = exclusive_manual_compaction_; - db_->CompactRange(cro, handles_[1], nullptr, nullptr); + ASSERT_OK(db_->CompactRange(cro, handles_[1], nullptr, nullptr)); // Verify manual compaction doesn't fill block cache ASSERT_EQ(prev_block_cache_add, options.statistics->getTickerCount(BLOCK_CACHE_ADD)); @@ -2526,7 +2718,7 @@ ASSERT_EQ("3", FilesPerLevel(1)); // Compaction range overlaps files - Compact(1, "p1", "p9", 1); + Compact(1, "p", "q", 1); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,1", FilesPerLevel(1)); ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); @@ -2559,7 +2751,8 @@ CompactRangeOptions compact_options; compact_options.target_path_id = 1; compact_options.exclusive_manual_compaction = exclusive_manual_compaction_; - db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); + ASSERT_OK( + db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,1", FilesPerLevel(1)); @@ -2616,10 +2809,10 @@ Random rnd(301); for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) { - ASSERT_OK(Put(1, ToString(key), RandomString(&rnd, kTestValueSize))); + ASSERT_OK(Put(1, ToString(key), rnd.RandomString(kTestValueSize))); } - dbfull()->TEST_WaitForFlushMemTable(handles_[1]); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1])); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ColumnFamilyMetaData cf_meta; dbfull()->GetColumnFamilyMetaData(handles_[1], &cf_meta); @@ -2692,13 +2885,13 @@ std::vector keys; std::vector values; for (int k = 0; k < kNumInsertedKeys; ++k) { - keys.emplace_back(RandomString(&rnd, kKeySize)); - values.emplace_back(RandomString(&rnd, kKvSize - kKeySize)); + keys.emplace_back(rnd.RandomString(kKeySize)); + values.emplace_back(rnd.RandomString(kKvSize - kKeySize)); ASSERT_OK(Put(Slice(keys[k]), Slice(values[k]))); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - dbfull()->TEST_FlushMemTable(true); + ASSERT_OK(dbfull()->TEST_FlushMemTable(true)); // Make sure the number of L0 files can trigger compaction. ASSERT_GE(NumTableFilesAtLevel(0), options.level0_file_num_compaction_trigger); @@ -2759,12 +2952,12 @@ for (int i = 0; i < 2; ++i) { // Create 1MB sst file for (int j = 0; j < 100; ++j) { - ASSERT_OK(Put(Key(i * 50 + j), RandomString(&rnd, 10 * 1024))); + ASSERT_OK(Put(Key(i * 50 + j), rnd.RandomString(10 * 1024))); } ASSERT_OK(Flush()); } // this should execute L0->L1 - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,1", FilesPerLevel(0)); // block compactions @@ -2781,7 +2974,7 @@ sleeping_task.WaitUntilDone(); // this should execute L1->L2 (move) - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,0,1", FilesPerLevel(0)); @@ -2794,12 +2987,12 @@ for (int i = 0; i < 2; ++i) { // Create 1MB sst file for (int j = 0; j < 100; ++j) { - ASSERT_OK(Put(Key(i * 50 + j + 100), RandomString(&rnd, 10 * 1024))); + ASSERT_OK(Put(Key(i * 50 + j + 100), rnd.RandomString(10 * 1024))); } ASSERT_OK(Flush()); } // this should execute both L0->L1 and L1->L2 (merge with previous file) - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("0,0,2", FilesPerLevel(0)); @@ -2807,6 +3000,7 @@ ASSERT_OK(env_->FileExists(dbname_ + moved_file_name)); listener->SetExpectedFileName(dbname_ + moved_file_name); + ASSERT_OK(iterator->status()); iterator.reset(); // this file should have been compacted away @@ -2821,7 +3015,7 @@ } Options options = CurrentOptions(); options.memtable_factory.reset( - new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); + test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); options.compaction_style = kCompactionStyleLevel; options.write_buffer_size = 110 << 10; // 110KB options.arena_block_size = 4 << 10; @@ -2969,7 +3163,7 @@ for (int num = 0; num < 10; num++) { GenerateNewRandomFile(&rnd); } - db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"CompactionJob::Run():Start", @@ -2990,7 +3184,7 @@ "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:1"); GenerateNewRandomFile(&rnd, /* nowait */ true); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr)); for (int num = 0; num < options.level0_file_num_compaction_trigger + 1; num++) { @@ -3000,7 +3194,7 @@ TEST_SYNC_POINT( "DBCompactionTest::SuggestCompactRangeNoTwoLevel0Compactions:2"); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); } static std::string ShortKey(int i) { @@ -3052,7 +3246,7 @@ std::vector values; // File with keys [ 0 => 99 ] for (int i = 0; i < 100; i++) { - values.push_back(RandomString(&rnd, value_size)); + values.push_back(rnd.RandomString(value_size)); ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); @@ -3069,7 +3263,7 @@ // File with keys [ 100 => 199 ] for (int i = 100; i < 200; i++) { - values.push_back(RandomString(&rnd, value_size)); + values.push_back(rnd.RandomString(value_size)); ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); @@ -3087,7 +3281,7 @@ // File with keys [ 200 => 299 ] for (int i = 200; i < 300; i++) { - values.push_back(RandomString(&rnd, value_size)); + values.push_back(rnd.RandomString(value_size)); ASSERT_OK(Put(ShortKey(i), values[i])); } ASSERT_OK(Flush()); @@ -3118,14 +3312,28 @@ options.level0_file_num_compaction_trigger = 5; options.max_background_compactions = 2; options.max_subcompactions = max_subcompactions_; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + options.write_buffer_size = 2 << 20; // 2MB + + BlockBasedTableOptions table_options; + table_options.block_cache = NewLRUCache(64 << 20); // 64MB + table_options.cache_index_and_filter_blocks = true; + table_options.pin_l0_filter_and_index_blocks_in_cache = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); const size_t kValueSize = 1 << 20; Random rnd(301); - std::string value(RandomString(&rnd, kValueSize)); + std::string value(rnd.RandomString(kValueSize)); + // The L0->L1 must be picked before we begin flushing files to trigger + // intra-L0 compaction, and must not finish until after an intra-L0 + // compaction has been picked. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"LevelCompactionPicker::PickCompactionBySize:0", + {{"LevelCompactionPicker::PickCompaction:Return", + "DBCompactionTest::IntraL0Compaction:L0ToL1Ready"}, + {"LevelCompactionPicker::PickCompactionBySize:0", "CompactionJob::Run():Start"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); @@ -3143,13 +3351,14 @@ for (int i = 0; i < 10; ++i) { ASSERT_OK(Put(Key(0), "")); // prevents trivial move if (i == 5) { + TEST_SYNC_POINT("DBCompactionTest::IntraL0Compaction:L0ToL1Ready"); ASSERT_OK(Put(Key(i + 1), value + value)); } else { ASSERT_OK(Put(Key(i + 1), value)); } ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); std::vector> level_to_files; @@ -3162,6 +3371,16 @@ for (int i = 0; i < 2; ++i) { ASSERT_GE(level_to_files[0][i].fd.file_size, 1 << 21); } + + // The index/filter in the file produced by intra-L0 should not be pinned. + // That means clearing unref'd entries in block cache and re-accessing the + // file produced by intra-L0 should bump the index block miss count. + uint64_t prev_index_misses = + TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS); + table_options.block_cache->EraseUnRefEntries(); + ASSERT_EQ("", Get(Key(0))); + ASSERT_EQ(prev_index_misses + 1, + TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS)); } TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) { @@ -3176,10 +3395,16 @@ const size_t kValueSize = 1 << 20; Random rnd(301); - std::string value(RandomString(&rnd, kValueSize)); + std::string value(rnd.RandomString(kValueSize)); + // The L0->L1 must be picked before we begin flushing files to trigger + // intra-L0 compaction, and must not finish until after an intra-L0 + // compaction has been picked. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"LevelCompactionPicker::PickCompactionBySize:0", + {{"LevelCompactionPicker::PickCompaction:Return", + "DBCompactionTest::IntraL0CompactionDoesNotObsoleteDeletions:" + "L0ToL1Ready"}, + {"LevelCompactionPicker::PickCompactionBySize:0", "CompactionJob::Run():Start"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); @@ -3203,10 +3428,15 @@ } else { ASSERT_OK(Delete(Key(0))); } + if (i == 5) { + TEST_SYNC_POINT( + "DBCompactionTest::IntraL0CompactionDoesNotObsoleteDeletions:" + "L0ToL1Ready"); + } ASSERT_OK(Put(Key(i + 1), value)); ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); std::vector> level_to_files; @@ -3254,7 +3484,7 @@ int key_idx = 0; GenerateNewFile(&rnd, &key_idx); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(1, num_bottom_pri_compactions); @@ -3278,8 +3508,8 @@ // So key 0, 2, and 4+ fall outside these levels' key-ranges. for (int level = 2; level >= 1; --level) { for (int i = 0; i < 2; ++i) { - Put(Key(2 * i + 1), "val"); - Flush(); + ASSERT_OK(Put(Key(2 * i + 1), "val")); + ASSERT_OK(Flush()); } MoveFilesToLevel(level); ASSERT_EQ(2, NumTableFilesAtLevel(level)); @@ -3289,11 +3519,11 @@ // - Tombstones for keys 2 and 4 can be dropped early. // - Tombstones for keys 1 and 3 must be kept due to L2 files' key-ranges. for (int i = 0; i < kNumL0Files; ++i) { - Put(Key(0), "val"); // sentinel to prevent trivial move - Delete(Key(i + 1)); - Flush(); + ASSERT_OK(Put(Key(0), "val")); // sentinel to prevent trivial move + ASSERT_OK(Delete(Key(i + 1))); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); for (int i = 0; i < kNumL0Files; ++i) { std::string value; @@ -3357,10 +3587,10 @@ TEST_F(DBCompactionTest, CompactFilesOverlapInL0Bug) { // Regression test for bug of not pulling in L0 files that overlap the user- // specified input files in time- and key-ranges. - Put(Key(0), "old_val"); - Flush(); - Put(Key(0), "new_val"); - Flush(); + ASSERT_OK(Put(Key(0), "old_val")); + ASSERT_OK(Flush()); + ASSERT_OK(Put(Key(0), "new_val")); + ASSERT_OK(Flush()); ColumnFamilyMetaData cf_meta; dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta); @@ -3376,6 +3606,41 @@ ASSERT_EQ("new_val", Get(Key(0))); } +TEST_F(DBCompactionTest, DeleteFilesInRangeConflictWithCompaction) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + const Snapshot* snapshot = nullptr; + const int kMaxKey = 10; + + for (int i = 0; i < kMaxKey; i++) { + ASSERT_OK(Put(Key(i), Key(i))); + ASSERT_OK(Delete(Key(i))); + if (!snapshot) { + snapshot = db_->GetSnapshot(); + } + } + ASSERT_OK(Flush()); + MoveFilesToLevel(1); + ASSERT_OK(Put(Key(kMaxKey), Key(kMaxKey))); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + // test DeleteFilesInRange() deletes the files already picked for compaction + SyncPoint::GetInstance()->LoadDependency( + {{"VersionSet::LogAndApply:WriteManifestStart", + "BackgroundCallCompaction:0"}, + {"DBImpl::BackgroundCompaction:Finish", + "VersionSet::LogAndApply:WriteManifestDone"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + // release snapshot which mark bottommost file for compaction + db_->ReleaseSnapshot(snapshot); + std::string begin_string = Key(0); + std::string end_string = Key(kMaxKey + 1); + Slice begin(begin_string); + Slice end(end_string); + ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end)); + SyncPoint::GetInstance()->DisableProcessing(); +} + TEST_F(DBCompactionTest, CompactBottomLevelFilesWithDeletions) { // bottom-level files may contain deletions due to snapshots protecting the // deleted keys. Once the snapshot is released, we should see files with many @@ -3395,7 +3660,7 @@ for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); } if (i == kNumLevelFiles - 1) { snapshot = db_->GetSnapshot(); @@ -3406,12 +3671,12 @@ ASSERT_OK(Delete(Key(j))); } } - Flush(); + ASSERT_OK(Flush()); if (i < kNumLevelFiles - 1) { ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); } } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(kNumLevelFiles, NumTableFilesAtLevel(1)); std::vector pre_release_metadata, post_release_metadata; @@ -3432,7 +3697,7 @@ CompactionReason::kBottommostFiles); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); db_->GetLiveFilesMetaData(&post_release_metadata); ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size()); @@ -3448,6 +3713,76 @@ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } +TEST_F(DBCompactionTest, NoCompactBottomLevelFilesWithDeletions) { + // bottom-level files may contain deletions due to snapshots protecting the + // deleted keys. Once the snapshot is released, we should see files with many + // such deletions undergo single-file compactions. But when disabling auto + // compactions, it shouldn't be triggered which may causing too many + // background jobs. + const int kNumKeysPerFile = 1024; + const int kNumLevelFiles = 4; + const int kValueSize = 128; + Options options = CurrentOptions(); + options.compression = kNoCompression; + options.disable_auto_compactions = true; + options.level0_file_num_compaction_trigger = kNumLevelFiles; + // inflate it a bit to account for key/metadata overhead + options.target_file_size_base = 120 * kNumKeysPerFile * kValueSize / 100; + Reopen(options); + + Random rnd(301); + const Snapshot* snapshot = nullptr; + for (int i = 0; i < kNumLevelFiles; ++i) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK( + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); + } + if (i == kNumLevelFiles - 1) { + snapshot = db_->GetSnapshot(); + // delete every other key after grabbing a snapshot, so these deletions + // and the keys they cover can't be dropped until after the snapshot is + // released. + for (int j = 0; j < kNumLevelFiles * kNumKeysPerFile; j += 2) { + ASSERT_OK(Delete(Key(j))); + } + } + ASSERT_OK(Flush()); + if (i < kNumLevelFiles - 1) { + ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); + } + } + ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr)); + ASSERT_EQ(kNumLevelFiles, NumTableFilesAtLevel(1)); + + std::vector pre_release_metadata, post_release_metadata; + db_->GetLiveFilesMetaData(&pre_release_metadata); + // just need to bump seqnum so ReleaseSnapshot knows the newest key in the SST + // files does not need to be preserved in case of a future snapshot. + ASSERT_OK(Put(Key(0), "val")); + + // release snapshot and no compaction should be triggered. + std::atomic num_compactions{0}; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:Start", + [&](void* /*arg*/) { num_compactions.fetch_add(1); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + db_->ReleaseSnapshot(snapshot); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(0, num_compactions); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + + db_->GetLiveFilesMetaData(&post_release_metadata); + ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size()); + for (size_t i = 0; i < pre_release_metadata.size(); ++i) { + const auto& pre_file = pre_release_metadata[i]; + const auto& post_file = post_release_metadata[i]; + ASSERT_EQ(1, pre_file.level); + ASSERT_EQ(1, post_file.level); + // each file is same as before with deletion markers/deleted keys. + ASSERT_EQ(post_file.size, pre_file.size); + } +} + TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) { const int kNumKeysPerFile = 32; const int kNumLevelFiles = 2; @@ -3457,21 +3792,22 @@ options.compression = kNoCompression; options.ttl = 24 * 60 * 60; // 24 hours options.max_open_files = -1; - env_->time_elapse_only_sleep_ = false; + env_->SetMockSleep(); options.env = env_; - env_->addon_time_.store(0); + // NOTE: Presumed unnecessary and removed: resetting mock time in env + DestroyAndReopen(options); Random rnd(301); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); MoveFilesToLevel(3); ASSERT_EQ("0,0,0,2", FilesPerLevel()); @@ -3480,44 +3816,45 @@ for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j))); } - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("2,0,0,2", FilesPerLevel()); MoveFilesToLevel(1); ASSERT_EQ("0,2,0,2", FilesPerLevel()); - env_->addon_time_.fetch_add(36 * 60 * 60); // 36 hours + env_->MockSleepForSeconds(36 * 60 * 60); // 36 hours ASSERT_EQ("0,2,0,2", FilesPerLevel()); // Just do a simple write + flush so that the Ttl expired files get // compacted. ASSERT_OK(Put("a", "1")); - Flush(); + ASSERT_OK(Flush()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); ASSERT_TRUE(compaction->compaction_reason() == CompactionReason::kTtl); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // All non-L0 files are deleted, as they contained only deleted data. ASSERT_EQ("1", FilesPerLevel()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); // Test dynamically changing ttl. - env_->addon_time_.store(0); + // NOTE: Presumed unnecessary and removed: resetting mock time in env + DestroyAndReopen(options); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); MoveFilesToLevel(3); ASSERT_EQ("0,0,0,2", FilesPerLevel()); @@ -3526,19 +3863,19 @@ for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK(Delete(Key(i * kNumKeysPerFile + j))); } - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("2,0,0,2", FilesPerLevel()); MoveFilesToLevel(1); ASSERT_EQ("0,2,0,2", FilesPerLevel()); // Move time forward by 12 hours, and make sure that compaction still doesn't // trigger as ttl is set to 24 hours. - env_->addon_time_.fetch_add(12 * 60 * 60); + env_->MockSleepForSeconds(12 * 60 * 60); ASSERT_OK(Put("a", "1")); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("1,2,0,2", FilesPerLevel()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( @@ -3551,13 +3888,14 @@ // Dynamically change ttl to 10 hours. // This should trigger a ttl compaction, as 12 hours have already passed. ASSERT_OK(dbfull()->SetOptions({{"ttl", "36000"}})); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // All non-L0 files are deleted, as they contained only deleted data. ASSERT_EQ("1", FilesPerLevel()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_F(DBCompactionTest, LevelTtlCascadingCompactions) { + env_->SetMockSleep(); const int kValueSize = 100; for (bool if_restart : {false, true}) { @@ -3588,10 +3926,10 @@ } }); - env_->time_elapse_only_sleep_ = false; options.env = env_; - env_->addon_time_.store(0); + // NOTE: Presumed unnecessary and removed: resetting mock time in env + DestroyAndReopen(options); int ttl_compactions = 0; @@ -3608,9 +3946,9 @@ // Add two L6 files with key ranges: [1 .. 100], [101 .. 200]. Random rnd(301); for (int i = 1; i <= 100; ++i) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); + ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); // Get the first file's creation time. This will be the oldest file in the // DB. Compactions inolving this file's descendents should keep getting // this time. @@ -3619,35 +3957,35 @@ &level_to_files); uint64_t oldest_time = level_to_files[0][0].oldest_ancester_time; // Add 1 hour and do another flush. - env_->addon_time_.fetch_add(1 * 60 * 60); + env_->MockSleepForSeconds(1 * 60 * 60); for (int i = 101; i <= 200; ++i) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); + ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); MoveFilesToLevel(6); ASSERT_EQ("0,0,0,0,0,0,2", FilesPerLevel()); - env_->addon_time_.fetch_add(1 * 60 * 60); + env_->MockSleepForSeconds(1 * 60 * 60); // Add two L4 files with key ranges: [1 .. 50], [51 .. 150]. for (int i = 1; i <= 50; ++i) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); + ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); } - Flush(); - env_->addon_time_.fetch_add(1 * 60 * 60); + ASSERT_OK(Flush()); + env_->MockSleepForSeconds(1 * 60 * 60); for (int i = 51; i <= 150; ++i) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); + ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); MoveFilesToLevel(4); ASSERT_EQ("0,0,0,0,2,0,2", FilesPerLevel()); - env_->addon_time_.fetch_add(1 * 60 * 60); + env_->MockSleepForSeconds(1 * 60 * 60); // Add one L1 file with key range: [26, 75]. for (int i = 26; i <= 75; ++i) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, kValueSize))); + ASSERT_OK(Put(Key(i), rnd.RandomString(kValueSize))); } - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); MoveFilesToLevel(1); ASSERT_EQ("0,1,0,0,2,0,2", FilesPerLevel()); @@ -3671,15 +4009,15 @@ // 4. A TTL compaction happens between L5 and L6 files. Ouptut in L6. // Add 25 hours and do a write - env_->addon_time_.fetch_add(25 * 60 * 60); + env_->MockSleepForSeconds(25 * 60 * 60); ASSERT_OK(Put(Key(1), "1")); if (if_restart) { Reopen(options); } else { - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("1,0,0,0,0,0,1", FilesPerLevel()); ASSERT_EQ(5, ttl_compactions); @@ -3687,14 +4025,14 @@ &level_to_files); ASSERT_EQ(oldest_time, level_to_files[6][0].oldest_ancester_time); - env_->addon_time_.fetch_add(25 * 60 * 60); + env_->MockSleepForSeconds(25 * 60 * 60); ASSERT_OK(Put(Key(2), "1")); if (if_restart) { Reopen(options); } else { - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("1,0,0,0,0,0,1", FilesPerLevel()); ASSERT_GE(ttl_compactions, 6); @@ -3704,6 +4042,7 @@ } TEST_F(DBCompactionTest, LevelPeriodicCompaction) { + env_->SetMockSleep(); const int kNumKeysPerFile = 32; const int kNumLevelFiles = 2; const int kValueSize = 100; @@ -3735,10 +4074,10 @@ } }); - env_->time_elapse_only_sleep_ = false; options.env = env_; - env_->addon_time_.store(0); + // NOTE: Presumed unnecessary and removed: resetting mock time in env + DestroyAndReopen(options); int periodic_compactions = 0; @@ -3755,21 +4094,21 @@ Random rnd(301); for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), - RandomString(&rnd, kValueSize))); + ASSERT_OK( + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("2", FilesPerLevel()); ASSERT_EQ(0, periodic_compactions); // Add 50 hours and do a write - env_->addon_time_.fetch_add(50 * 60 * 60); + env_->MockSleepForSeconds(50 * 60 * 60); ASSERT_OK(Put("a", "1")); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Assert that the files stay in the same level ASSERT_EQ("3", FilesPerLevel()); // The two old files go through the periodic compaction process @@ -3779,24 +4118,24 @@ ASSERT_EQ("0,3", FilesPerLevel()); // Add another 50 hours and do another write - env_->addon_time_.fetch_add(50 * 60 * 60); + env_->MockSleepForSeconds(50 * 60 * 60); ASSERT_OK(Put("b", "2")); if (if_restart) { Reopen(options); } else { - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("1,3", FilesPerLevel()); // The three old files now go through the periodic compaction process. 2 // + 3. ASSERT_EQ(5, periodic_compactions); // Add another 50 hours and do another write - env_->addon_time_.fetch_add(50 * 60 * 60); + env_->MockSleepForSeconds(50 * 60 * 60); ASSERT_OK(Put("c", "3")); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("2,3", FilesPerLevel()); // The four old files now go through the periodic compaction process. 5 // + 4. @@ -3817,10 +4156,11 @@ const int kValueSize = 100; Options options = CurrentOptions(); - env_->time_elapse_only_sleep_ = false; + env_->SetMockSleep(); options.env = env_; - env_->addon_time_.store(0); + // NOTE: Presumed unnecessary and removed: resetting mock time in env + DestroyAndReopen(options); int periodic_compactions = 0; @@ -3850,9 +4190,9 @@ for (int i = 0; i < kNumFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); // Move the first two files to L2. if (i == 1) { MoveFilesToLevel(2); @@ -3868,7 +4208,7 @@ set_file_creation_time_to_zero = false; // Forward the clock by 2 days. - env_->addon_time_.fetch_add(2 * 24 * 60 * 60); + env_->MockSleepForSeconds(2 * 24 * 60 * 60); options.periodic_compaction_seconds = 1 * 24 * 60 * 60; // 1 day Reopen(options); @@ -3889,10 +4229,11 @@ options.ttl = 10 * 60 * 60; // 10 hours options.periodic_compaction_seconds = 48 * 60 * 60; // 2 days options.max_open_files = -1; // needed for both periodic and ttl compactions - env_->time_elapse_only_sleep_ = false; + env_->SetMockSleep(); options.env = env_; - env_->addon_time_.store(0); + // NOTE: Presumed unnecessary and removed: resetting mock time in env + DestroyAndReopen(options); int periodic_compactions = 0; @@ -3913,11 +4254,11 @@ for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); MoveFilesToLevel(3); @@ -3926,20 +4267,20 @@ ASSERT_EQ(0, ttl_compactions); // Add some time greater than periodic_compaction_time. - env_->addon_time_.fetch_add(50 * 60 * 60); + env_->MockSleepForSeconds(50 * 60 * 60); ASSERT_OK(Put("a", "1")); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Files in the bottom level go through periodic compactions. ASSERT_EQ("1,0,0,2", FilesPerLevel()); ASSERT_EQ(2, periodic_compactions); ASSERT_EQ(0, ttl_compactions); // Add a little more time than ttl - env_->addon_time_.fetch_add(11 * 60 * 60); + env_->MockSleepForSeconds(11 * 60 * 60); ASSERT_OK(Put("b", "1")); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Notice that the previous file in level 1 falls down to the bottom level // due to ttl compactions, one level at a time. // And bottom level files don't get picked up for ttl compactions. @@ -3948,10 +4289,10 @@ ASSERT_EQ(3, ttl_compactions); // Add some time greater than periodic_compaction_time. - env_->addon_time_.fetch_add(50 * 60 * 60); + env_->MockSleepForSeconds(50 * 60 * 60); ASSERT_OK(Put("c", "1")); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Previous L0 file falls one level at a time to bottom level due to ttl. // And all 4 bottom files go through periodic compactions. ASSERT_EQ("1,0,0,4", FilesPerLevel()); @@ -3961,6 +4302,67 @@ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } +TEST_F(DBCompactionTest, LevelTtlBooster) { + const int kNumKeysPerFile = 32; + const int kNumLevelFiles = 3; + const int kValueSize = 1000; + + Options options = CurrentOptions(); + options.ttl = 10 * 60 * 60; // 10 hours + options.periodic_compaction_seconds = 480 * 60 * 60; // very long + options.level0_file_num_compaction_trigger = 2; + options.max_bytes_for_level_base = 5 * uint64_t{kNumKeysPerFile * kValueSize}; + options.max_open_files = -1; // needed for both periodic and ttl compactions + options.compaction_pri = CompactionPri::kMinOverlappingRatio; + env_->SetMockSleep(); + options.env = env_; + + // NOTE: Presumed unnecessary and removed: resetting mock time in env + + DestroyAndReopen(options); + + Random rnd(301); + for (int i = 0; i < kNumLevelFiles; ++i) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK( + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); + } + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + MoveFilesToLevel(2); + + ASSERT_EQ("0,0,3", FilesPerLevel()); + + // Create some files for L1 + for (int i = 0; i < 2; i++) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK(Put(Key(2 * j + i), rnd.RandomString(kValueSize))); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + } + + ASSERT_EQ("0,1,3", FilesPerLevel()); + + // Make the new L0 files qualify TTL boosting and generate one more to trigger + // L1 -> L2 compaction. Old files will be picked even if their priority is + // lower without boosting. + env_->MockSleepForSeconds(8 * 60 * 60); + for (int i = 0; i < 2; i++) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK(Put(Key(kNumKeysPerFile * 2 + 2 * j + i), + rnd.RandomString(kValueSize * 2))); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + } + ASSERT_EQ("0,1,2", FilesPerLevel()); + + ASSERT_GT(SizeAtLevel(1), kNumKeysPerFile * 4 * kValueSize); +} + TEST_F(DBCompactionTest, LevelPeriodicCompactionWithCompactionFilters) { class TestCompactionFilter : public CompactionFilter { const char* Name() const override { return "TestCompactionFilter"; } @@ -3981,9 +4383,10 @@ Options options = CurrentOptions(); TestCompactionFilter test_compaction_filter; - env_->time_elapse_only_sleep_ = false; + env_->SetMockSleep(); options.env = env_; - env_->addon_time_.store(0); + + // NOTE: Presumed unnecessary and removed: resetting mock time in env enum CompactionFilterType { kUseCompactionFilter, @@ -4024,20 +4427,20 @@ for (int i = 0; i < kNumLevelFiles; ++i) { for (int j = 0; j < kNumKeysPerFile; ++j) { ASSERT_OK( - Put(Key(i * kNumKeysPerFile + j), RandomString(&rnd, kValueSize))); + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); } - Flush(); + ASSERT_OK(Flush()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("2", FilesPerLevel()); ASSERT_EQ(0, periodic_compactions); // Add 31 days and do a write - env_->addon_time_.fetch_add(31 * 24 * 60 * 60); + env_->MockSleepForSeconds(31 * 24 * 60 * 60); ASSERT_OK(Put("a", "1")); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); // Assert that the files stay in the same level ASSERT_EQ("3", FilesPerLevel()); // The two old files go through the periodic compaction process @@ -4084,18 +4487,18 @@ Random rnd(301); for (int j = 0; j < kNumL0FilesLimit - 1; ++j) { for (int k = 0; k < 2; ++k) { - ASSERT_OK(Put(Key(k), RandomString(&rnd, 1024))); + ASSERT_OK(Put(Key(k), rnd.RandomString(1024))); } - Flush(); + ASSERT_OK(Flush()); } auto manual_compaction_thread = port::Thread([this]() { CompactRangeOptions cro; cro.allow_write_stall = false; - db_->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); }); manual_compaction_thread.join(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(1), 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); @@ -4138,21 +4541,21 @@ Random rnd(301); for (int j = 0; j < kNumImmMemTableLimit - 1; ++j) { - ASSERT_OK(Put(Key(0), RandomString(&rnd, 1024))); + ASSERT_OK(Put(Key(0), rnd.RandomString(1024))); FlushOptions flush_opts; flush_opts.wait = false; flush_opts.allow_write_stall = true; - dbfull()->Flush(flush_opts); + ASSERT_OK(dbfull()->Flush(flush_opts)); } auto manual_compaction_thread = port::Thread([this]() { CompactRangeOptions cro; cro.allow_write_stall = false; - db_->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); }); manual_compaction_thread.join(); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_EQ(0, NumTableFilesAtLevel(0)); ASSERT_GT(NumTableFilesAtLevel(1), 0); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); @@ -4186,14 +4589,13 @@ Random rnd(301); for (int j = 0; j < kNumL0FilesLimit - 1; ++j) { for (int k = 0; k < 2; ++k) { - ASSERT_OK(Put(1, Key(k), RandomString(&rnd, 1024))); + ASSERT_OK(Put(1, Key(k), rnd.RandomString(1024))); } - Flush(1); + ASSERT_OK(Flush(1)); } auto manual_compaction_thread = port::Thread([this, i]() { CompactRangeOptions cro; cro.allow_write_stall = false; - Status s = db_->CompactRange(cro, handles_[1], nullptr, nullptr); if (i == 0) { ASSERT_TRUE(db_->CompactRange(cro, handles_[1], nullptr, nullptr) .IsColumnFamilyDropped()); @@ -4213,7 +4615,7 @@ manual_compaction_thread.join(); TEST_SYNC_POINT( "DBCompactionTest::CompactRangeShutdownWhileDelayed:PostManual"); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } } @@ -4246,27 +4648,28 @@ flush_opts.allow_write_stall = true; for (int i = 0; i < kNumL0FilesLimit - 1; ++i) { for (int j = 0; j < 2; ++j) { - ASSERT_OK(Put(Key(j), RandomString(&rnd, 1024))); + ASSERT_OK(Put(Key(j), rnd.RandomString(1024))); } - dbfull()->Flush(flush_opts); + ASSERT_OK(dbfull()->Flush(flush_opts)); } auto manual_compaction_thread = port::Thread([this]() { CompactRangeOptions cro; cro.allow_write_stall = false; - db_->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); }); TEST_SYNC_POINT("DBCompactionTest::CompactRangeSkipFlushAfterDelay:PreFlush"); - Put(ToString(0), RandomString(&rnd, 1024)); - dbfull()->Flush(flush_opts); - Put(ToString(0), RandomString(&rnd, 1024)); + ASSERT_OK(Put(ToString(0), rnd.RandomString(1024))); + ASSERT_OK(dbfull()->Flush(flush_opts)); + ASSERT_OK(Put(ToString(0), rnd.RandomString(1024))); TEST_SYNC_POINT("DBCompactionTest::CompactRangeSkipFlushAfterDelay:PostFlush"); manual_compaction_thread.join(); // If CompactRange's flush was skipped, the final Put above will still be // in the active memtable. std::string num_keys_in_memtable; - db_->GetProperty(DB::Properties::kNumEntriesActiveMemTable, &num_keys_in_memtable); + ASSERT_TRUE(db_->GetProperty(DB::Properties::kNumEntriesActiveMemTable, + &num_keys_in_memtable)); ASSERT_EQ(ToString(1), num_keys_in_memtable); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); @@ -4324,7 +4727,7 @@ } else { ASSERT_EQ(2, num_memtable_entries); // flush anyways to prepare for next iteration - db_->Flush(FlushOptions()); + ASSERT_OK(db_->Flush(FlushOptions())); } } } @@ -4339,12 +4742,12 @@ for (int i = 0; i < 32; i++) { for (int j = 0; j < 5000; j++) { - Put(std::to_string(j), std::string(1, 'A')); + ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); } ASSERT_OK(Flush()); ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ColumnFamilyHandleImpl* cfh = static_cast(dbfull()->DefaultColumnFamily()); ColumnFamilyData* cfd = cfh->cfd(); @@ -4429,7 +4832,7 @@ ASSERT_OK(Delete("b")); ASSERT_OK(Flush()); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_EQ(NumTableFilesAtLevel(1), 0); @@ -4476,7 +4879,8 @@ options.level0_slowdown_writes_trigger = 64; options.level0_stop_writes_trigger = 64; options.max_background_jobs = kMaxBackgroundThreads; // Enough threads - options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); + options.memtable_factory.reset( + test::NewSpecialSkipListFactory(kNumKeysPerFile)); options.max_write_buffer_number = 10; // Enough memtables DestroyAndReopen(options); @@ -4562,7 +4966,7 @@ } for (unsigned int cf = 0; cf < cf_count; cf++) { - dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); } } @@ -4580,7 +4984,7 @@ } // put extra key to trigger flush ASSERT_OK(Put(0, "", "")); - dbfull()->TEST_WaitForFlushMemTable(handles_[0]); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0])); ASSERT_EQ(options.level0_file_num_compaction_trigger + num + 1, NumTableFilesAtLevel(0, 0)); } @@ -4595,7 +4999,7 @@ } for (unsigned int cf = 0; cf < cf_count; cf++) { - dbfull()->TEST_WaitForFlushMemTable(handles_[cf]); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); } ASSERT_OK(dbfull()->TEST_WaitForCompact()); @@ -4617,7 +5021,7 @@ // put extra key to trigger flush ASSERT_OK(Put(cf_test, "", "")); - dbfull()->TEST_WaitForFlushMemTable(handles_[cf_test]); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf_test])); ASSERT_EQ(1, NumTableFilesAtLevel(0, cf_test)); Compact(cf_test, Key(0), Key(keyIndex)); @@ -4636,7 +5040,7 @@ options.create_if_missing = true; options.disable_auto_compactions = true; options.use_direct_io_for_flush_and_compaction = GetParam(); - options.env = new MockEnv(Env::Default()); + options.env = MockEnv::Create(Env::Default()); Reopen(options); bool readahead = false; SyncPoint::GetInstance()->SetCallBack( @@ -4655,7 +5059,7 @@ CreateAndReopenWithCF({"pikachu"}, options); MakeTables(3, "p", "q", 1); ASSERT_EQ("1,1,1", FilesPerLevel(1)); - Compact(1, "p1", "p9"); + Compact(1, "p", "q"); ASSERT_EQ(readahead, options.use_direct_reads); ASSERT_EQ("0,0,1", FilesPerLevel(1)); Destroy(options); @@ -4668,7 +5072,8 @@ class CompactionPriTest : public DBTestBase, public testing::WithParamInterface { public: - CompactionPriTest() : DBTestBase("/compaction_pri_test") { + CompactionPriTest() + : DBTestBase("compaction_pri_test", /*env_do_fsync=*/true) { compaction_pri_ = GetParam(); } @@ -4696,13 +5101,13 @@ for (int i = 0; i < kNKeys; i++) { keys[i] = i; } - std::random_shuffle(std::begin(keys), std::end(keys)); + RandomShuffle(std::begin(keys), std::end(keys), rnd.Next()); for (int i = 0; i < kNKeys; i++) { - ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 102))); + ASSERT_OK(Put(Key(keys[i]), rnd.RandomString(102))); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); for (int i = 0; i < kNKeys; i++) { ASSERT_NE("NOT_FOUND", Get(Key(i))); } @@ -4741,9 +5146,9 @@ Random rnd(301); for (auto i = 0; i < 8; ++i) { for (auto j = 0; j < 10; ++j) { - Merge("foo", RandomString(&rnd, 1024)); + ASSERT_OK(Merge("foo", rnd.RandomString(1024))); } - Flush(); + ASSERT_OK(Flush()); } MoveFilesToLevel(2); @@ -4756,7 +5161,7 @@ CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - dbfull()->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); } TEST_F(DBCompactionTest, ManualCompactionFailsInReadOnlyMode) { @@ -4764,7 +5169,7 @@ // is in read-only mode. Verify it now at least returns, despite failing. const int kNumL0Files = 4; std::unique_ptr mock_env( - new FaultInjectionTestEnv(Env::Default())); + new FaultInjectionTestEnv(env_)); Options opts = CurrentOptions(); opts.disable_auto_compactions = true; opts.env = mock_env.get(); @@ -4773,9 +5178,9 @@ Random rnd(301); for (int i = 0; i < kNumL0Files; ++i) { // Make sure files are overlapping in key-range to prevent trivial move. - Put("key1", RandomString(&rnd, 1024)); - Put("key2", RandomString(&rnd, 1024)); - Flush(); + ASSERT_OK(Put("key1", rnd.RandomString(1024))); + ASSERT_OK(Put("key2", rnd.RandomString(1024))); + ASSERT_OK(Flush()); } ASSERT_EQ(kNumL0Files, NumTableFilesAtLevel(0)); @@ -4783,7 +5188,7 @@ mock_env->SetFilesystemActive(false); // Make sure this is outside `CompactRange`'s range so that it doesn't fail // early trying to flush memtable. - ASSERT_NOK(Put("key3", RandomString(&rnd, 1024))); + ASSERT_NOK(Put("key3", rnd.RandomString(1024))); // In the bug scenario, the first manual compaction would fail and forget to // unregister itself, causing the second one to hang forever due to conflict @@ -4822,9 +5227,9 @@ for (auto i = 0; i < 8; ++i) { for (auto j = 0; j < 10; ++j) { ASSERT_OK( - Put("foo" + std::to_string(i * 10 + j), RandomString(&rnd, 1024))); + Put("foo" + std::to_string(i * 10 + j), rnd.RandomString(1024))); } - Flush(); + ASSERT_OK(Flush()); } MoveFilesToLevel(2); @@ -4832,9 +5237,9 @@ for (auto i = 0; i < 8; ++i) { for (auto j = 0; j < 10; ++j) { ASSERT_OK( - Put("bar" + std::to_string(i * 10 + j), RandomString(&rnd, 1024))); + Put("bar" + std::to_string(i * 10 + j), rnd.RandomString(1024))); } - Flush(); + ASSERT_OK(Flush()); } const std::vector& comp_stats = internal_stats_ptr->TEST_GetCompactionStats(); @@ -4843,7 +5248,7 @@ CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized; - dbfull()->CompactRange(cro, nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); const std::vector& comp_stats2 = internal_stats_ptr->TEST_GetCompactionStats(); @@ -4851,6 +5256,97 @@ ASSERT_EQ(num, 0); } +TEST_F(DBCompactionTest, ManualCompactionMax) { + uint64_t l1_avg_size = 0, l2_avg_size = 0; + auto generate_sst_func = [&]() { + Random rnd(301); + for (auto i = 0; i < 100; i++) { + for (auto j = 0; j < 10; j++) { + ASSERT_OK(Put(Key(i * 10 + j), rnd.RandomString(1024))); + } + ASSERT_OK(Flush()); + } + MoveFilesToLevel(2); + + for (auto i = 0; i < 10; i++) { + for (auto j = 0; j < 10; j++) { + ASSERT_OK(Put(Key(i * 100 + j * 10), rnd.RandomString(1024))); + } + ASSERT_OK(Flush()); + } + MoveFilesToLevel(1); + + std::vector> level_to_files; + dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), + &level_to_files); + + uint64_t total = 0; + for (const auto& file : level_to_files[1]) { + total += file.compensated_file_size; + } + l1_avg_size = total / level_to_files[1].size(); + + total = 0; + for (const auto& file : level_to_files[2]) { + total += file.compensated_file_size; + } + l2_avg_size = total / level_to_files[2].size(); + }; + + std::atomic_int num_compactions(0); + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BGWorkCompaction", [&](void* /*arg*/) { ++num_compactions; }); + SyncPoint::GetInstance()->EnableProcessing(); + + Options opts = CurrentOptions(); + opts.disable_auto_compactions = true; + + // with default setting (1.6G by default), it should cover all files in 1 + // compaction + DestroyAndReopen(opts); + generate_sst_func(); + num_compactions.store(0); + CompactRangeOptions cro; + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + ASSERT_TRUE(num_compactions.load() == 1); + + // split the compaction to 5 + int num_split = 5; + DestroyAndReopen(opts); + generate_sst_func(); + uint64_t total_size = (l1_avg_size * 10) + (l2_avg_size * 100); + opts.max_compaction_bytes = total_size / num_split; + opts.target_file_size_base = total_size / num_split; + Reopen(opts); + num_compactions.store(0); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + ASSERT_TRUE(num_compactions.load() == num_split); + + // very small max_compaction_bytes, it should still move forward + opts.max_compaction_bytes = l1_avg_size / 2; + opts.target_file_size_base = l1_avg_size / 2; + DestroyAndReopen(opts); + generate_sst_func(); + num_compactions.store(0); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + ASSERT_TRUE(num_compactions.load() > 10); + + // dynamically set the option + num_split = 2; + opts.max_compaction_bytes = 0; + DestroyAndReopen(opts); + generate_sst_func(); + total_size = (l1_avg_size * 10) + (l2_avg_size * 100); + Status s = db_->SetOptions( + {{"max_compaction_bytes", std::to_string(total_size / num_split)}, + {"target_file_size_base", std::to_string(total_size / num_split)}}); + ASSERT_OK(s); + + num_compactions.store(0); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + ASSERT_TRUE(num_compactions.load() == num_split); +} + TEST_F(DBCompactionTest, CompactionDuringShutdown) { Options opts = CurrentOptions(); opts.level0_file_num_compaction_trigger = 2; @@ -4866,16 +5362,17 @@ for (auto i = 0; i < 2; ++i) { for (auto j = 0; j < 10; ++j) { ASSERT_OK( - Put("foo" + std::to_string(i * 10 + j), RandomString(&rnd, 1024))); + Put("foo" + std::to_string(i * 10 + j), rnd.RandomString(1024))); } - Flush(); + ASSERT_OK(Flush()); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", [&](void* /*arg*/) { dbfull()->shutting_down_.store(true); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + Status s = dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_TRUE(s.ok() || s.IsShutdownInProgress()); ASSERT_OK(dbfull()->error_handler_.GetBGError()); } @@ -4889,7 +5386,7 @@ // Generate an external SST file containing a single key, i.e. 99 std::string sst_files_dir = dbname_ + "/sst_files/"; - test::DestroyDir(env_, sst_files_dir); + ASSERT_OK(DestroyDir(env_, sst_files_dir)); ASSERT_OK(env_->CreateDir(sst_files_dir)); SstFileWriter sst_writer(EnvOptions(), options); const std::string sst_file_path = sst_files_dir + "test.sst"; @@ -4909,14 +5406,15 @@ options.level0_file_num_compaction_trigger = options.level0_stop_writes_trigger; options.max_subcompactions = max_subcompactions_; - options.memtable_factory.reset(new SpecialSkipListFactory(kNumKeysPerFile)); + options.memtable_factory.reset( + test::NewSpecialSkipListFactory(kNumKeysPerFile)); DestroyAndReopen(options); Random rnd(301); // Generate level0_stop_writes_trigger L0 files to trigger write stop for (int i = 0; i != options.level0_file_num_compaction_trigger; ++i) { for (int j = 0; j != kNumKeysPerFile; ++j) { - ASSERT_OK(Put(Key(j), RandomString(&rnd, 990))); + ASSERT_OK(Put(Key(j), rnd.RandomString(990))); } if (0 == i) { // When we reach here, the memtables have kNumKeysPerFile keys. Note that @@ -4928,7 +5426,7 @@ // extra key to trigger flush. ASSERT_OK(Put("", "")); } - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_EQ(NumTableFilesAtLevel(0 /*level*/, 0 /*cf*/), i + 1); } // When we reach this point, there will be level0_stop_writes_trigger L0 @@ -4958,10 +5456,11 @@ TEST_F(DBCompactionTest, ConsistencyFailTest) { Options options = CurrentOptions(); + options.force_consistency_checks = true; DestroyAndReopen(options); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "VersionBuilder::CheckConsistency", [&](void* arg) { + "VersionBuilder::CheckConsistency0", [&](void* arg) { auto p = reinterpret_cast*>(arg); // just swap the two FileMetaData so that we hit error @@ -4975,11 +5474,59 @@ for (int k = 0; k < 2; ++k) { ASSERT_OK(Put("foo", "bar")); - Flush(); + Status s = Flush(); + if (k < 1) { + ASSERT_OK(s); + } else { + ASSERT_TRUE(s.IsCorruption()); + } } ASSERT_NOK(Put("foo", "bar")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(DBCompactionTest, ConsistencyFailTest2) { + Options options = CurrentOptions(); + options.force_consistency_checks = true; + options.target_file_size_base = 1000; + options.level0_file_num_compaction_trigger = 2; + BlockBasedTableOptions bbto; + bbto.block_size = 400; // small block size + options.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyAndReopen(options); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "VersionBuilder::CheckConsistency1", [&](void* arg) { + auto p = + reinterpret_cast*>(arg); + // just swap the two FileMetaData so that we hit error + // in CheckConsistency funcion + FileMetaData* temp = *(p->first); + *(p->first) = *(p->second); + *(p->second) = temp; + }); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + Random rnd(301); + std::string value = rnd.RandomString(1000); + + ASSERT_OK(Put("foo1", value)); + ASSERT_OK(Put("z", "")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("foo2", value)); + ASSERT_OK(Put("z", "")); + Status s = Flush(); + ASSERT_TRUE(s.ok() || s.IsCorruption()); + + // This probably returns non-OK, but we rely on the next Put() + // to determine the DB is frozen. + ASSERT_NOK(dbfull()->TEST_WaitForCompact()); + ASSERT_NOK(Put("foo", "bar")); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); } void IngestOneKeyValue(DBImpl* db, const std::string& key, @@ -5012,10 +5559,16 @@ const size_t kValueSize = 1 << 20; Random rnd(301); std::atomic pick_intra_l0_count(0); - std::string value(RandomString(&rnd, kValueSize)); + std::string value(rnd.RandomString(kValueSize)); + // The L0->L1 must be picked before we begin ingesting files to trigger + // intra-L0 compaction, and must not finish until after an intra-L0 + // compaction has been picked. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBCompactionTestWithParam::FlushAfterIntraL0:1", + {{"LevelCompactionPicker::PickCompaction:Return", + "DBCompactionTestWithParam::" + "FlushAfterIntraL0CompactionCheckConsistencyFail:L0ToL1Ready"}, + {"LevelCompactionPicker::PickCompactionBySize:0", "CompactionJob::Run():Start"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "FindIntraL0Compaction", @@ -5043,19 +5596,20 @@ ASSERT_OK(Put(Key(0), "a")); ASSERT_EQ(5, NumTableFilesAtLevel(0)); + TEST_SYNC_POINT( + "DBCompactionTestWithParam::" + "FlushAfterIntraL0CompactionCheckConsistencyFail:L0ToL1Ready"); // Ingest 5 L0 sst. And this files would trigger PickIntraL0Compaction. for (int i = 5; i < 10; i++) { + ASSERT_EQ(i, NumTableFilesAtLevel(0)); IngestOneKeyValue(dbfull(), Key(i), value, options); - ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); } - TEST_SYNC_POINT("DBCompactionTestWithParam::FlushAfterIntraL0:1"); // Put one key, to make biggest log sequence number in this memtable is bigger // than sst which would be ingested in next step. ASSERT_OK(Put(Key(2), "b")); - ASSERT_EQ(10, NumTableFilesAtLevel(0)); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); std::vector> level_to_files; dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), @@ -5080,8 +5634,8 @@ const size_t kValueSize = 1 << 20; Random rnd(301); - std::string value(RandomString(&rnd, kValueSize)); - std::string value2(RandomString(&rnd, kValueSize)); + std::string value(rnd.RandomString(kValueSize)); + std::string value2(rnd.RandomString(kValueSize)); std::string bigvalue = value + value; // prevents trivial move @@ -5093,8 +5647,14 @@ ASSERT_EQ(0, NumTableFilesAtLevel(0)); std::atomic pick_intra_l0_count(0); + // The L0->L1 must be picked before we begin ingesting files to trigger + // intra-L0 compaction, and must not finish until after an intra-L0 + // compaction has been picked. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"DBCompactionTestWithParam::IntraL0CompactionAfterFlush:1", + {{"LevelCompactionPicker::PickCompaction:Return", + "DBCompactionTestWithParam::" + "IntraL0CompactionAfterFlushCheckConsistencyFail:L0ToL1Ready"}, + {"LevelCompactionPicker::PickCompactionBySize:0", "CompactionJob::Run():Start"}}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "FindIntraL0Compaction", @@ -5125,18 +5685,19 @@ } ASSERT_EQ(6, NumTableFilesAtLevel(0)); + TEST_SYNC_POINT( + "DBCompactionTestWithParam::" + "IntraL0CompactionAfterFlushCheckConsistencyFail:L0ToL1Ready"); // ingest file to trigger IntraL0Compaction for (int i = 6; i < 10; ++i) { ASSERT_EQ(i, NumTableFilesAtLevel(0)); IngestOneKeyValue(dbfull(), Key(i), value2, options); } - ASSERT_EQ(10, NumTableFilesAtLevel(0)); // Wake up flush job sleeping_tasks.WakeUp(); sleeping_tasks.WaitUntilDone(); - TEST_SYNC_POINT("DBCompactionTestWithParam::IntraL0CompactionAfterFlush:1"); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); uint64_t error_count = 0; @@ -5151,7 +5712,1668 @@ } } -#endif // !defined(ROCKSDB_LITE) +TEST_P(DBCompactionTestWithBottommostParam, SequenceKeysManualCompaction) { + constexpr int kSstNum = 10; + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + DestroyAndReopen(options); + + // Generate some sst files on level 0 with sequence keys (no overlap) + for (int i = 0; i < kSstNum; i++) { + for (int j = 1; j < UCHAR_MAX; j++) { + auto key = std::string(kSstNum, '\0'); + key[kSstNum - i] += static_cast(j); + ASSERT_OK(Put(key, std::string(i % 1000, 'A'))); + } + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + + ASSERT_EQ(ToString(kSstNum), FilesPerLevel(0)); + + auto cro = CompactRangeOptions(); + cro.bottommost_level_compaction = bottommost_level_compaction_; + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + if (bottommost_level_compaction_ == BottommostLevelCompaction::kForce || + bottommost_level_compaction_ == + BottommostLevelCompaction::kForceOptimized) { + // Real compaction to compact all sst files from level 0 to 1 file on level + // 1 + ASSERT_EQ("0,1", FilesPerLevel(0)); + } else { + // Just trivial move from level 0 -> 1 + ASSERT_EQ("0," + ToString(kSstNum), FilesPerLevel(0)); + } +} + +INSTANTIATE_TEST_CASE_P( + DBCompactionTestWithBottommostParam, DBCompactionTestWithBottommostParam, + ::testing::Values(BottommostLevelCompaction::kSkip, + BottommostLevelCompaction::kIfHaveCompactionFilter, + BottommostLevelCompaction::kForce, + BottommostLevelCompaction::kForceOptimized)); + +TEST_F(DBCompactionTest, UpdateLevelSubCompactionTest) { + Options options = CurrentOptions(); + options.max_subcompactions = 10; + options.target_file_size_base = 1 << 10; // 1KB + DestroyAndReopen(options); + + bool has_compaction = false; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { + Compaction* compaction = reinterpret_cast(arg); + ASSERT_TRUE(compaction->max_subcompactions() == 10); + has_compaction = true; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 10); + // Trigger compaction + for (int i = 0; i < 32; i++) { + for (int j = 0; j < 5000; j++) { + ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_TRUE(has_compaction); + + has_compaction = false; + ASSERT_OK(dbfull()->SetDBOptions({{"max_subcompactions", "2"}})); + ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 2); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { + Compaction* compaction = reinterpret_cast(arg); + ASSERT_TRUE(compaction->max_subcompactions() == 2); + has_compaction = true; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + // Trigger compaction + for (int i = 0; i < 32; i++) { + for (int j = 0; j < 5000; j++) { + ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_TRUE(has_compaction); +} + +TEST_F(DBCompactionTest, UpdateUniversalSubCompactionTest) { + Options options = CurrentOptions(); + options.max_subcompactions = 10; + options.compaction_style = kCompactionStyleUniversal; + options.target_file_size_base = 1 << 10; // 1KB + DestroyAndReopen(options); + + bool has_compaction = false; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { + Compaction* compaction = reinterpret_cast(arg); + ASSERT_TRUE(compaction->max_subcompactions() == 10); + has_compaction = true; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + // Trigger compaction + for (int i = 0; i < 32; i++) { + for (int j = 0; j < 5000; j++) { + ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_TRUE(has_compaction); + has_compaction = false; + + ASSERT_OK(dbfull()->SetDBOptions({{"max_subcompactions", "2"}})); + ASSERT_TRUE(dbfull()->GetDBOptions().max_subcompactions == 2); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "UniversalCompactionBuilder::PickCompaction:Return", [&](void* arg) { + Compaction* compaction = reinterpret_cast(arg); + ASSERT_TRUE(compaction->max_subcompactions() == 2); + has_compaction = true; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + // Trigger compaction + for (int i = 0; i < 32; i++) { + for (int j = 0; j < 5000; j++) { + ASSERT_OK(Put(std::to_string(j), std::string(1, 'A'))); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_TRUE(has_compaction); +} + +TEST_P(ChangeLevelConflictsWithAuto, TestConflict) { + // A `CompactRange()` may race with an automatic compaction, we'll need + // to make sure it doesn't corrupte the data. + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 2; + Reopen(options); + + ASSERT_OK(Put("foo", "v1")); + ASSERT_OK(Put("bar", "v1")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + + { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 2; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + } + ASSERT_EQ("0,0,1", FilesPerLevel(0)); + + // Run a qury to refitting to level 1 while another thread writing to + // the same level. + SyncPoint::GetInstance()->LoadDependency({ + // The first two dependencies ensure the foreground creates an L0 file + // between the background compaction's L0->L1 and its L1->L2. + { + "DBImpl::CompactRange:BeforeRefit:1", + "AutoCompactionFinished1", + }, + { + "AutoCompactionFinished2", + "DBImpl::CompactRange:BeforeRefit:2", + }, + }); + SyncPoint::GetInstance()->EnableProcessing(); + + std::thread auto_comp([&] { + TEST_SYNC_POINT("AutoCompactionFinished1"); + ASSERT_OK(Put("bar", "v2")); + ASSERT_OK(Put("foo", "v2")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("bar", "v3")); + ASSERT_OK(Put("foo", "v3")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + TEST_SYNC_POINT("AutoCompactionFinished2"); + }); + + { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = GetParam() ? 1 : 0; + // This should return non-OK, but it's more important for the test to + // make sure that the DB is not corrupted. + ASSERT_NOK(dbfull()->CompactRange(cro, nullptr, nullptr)); + } + auto_comp.join(); + // Refitting didn't happen. + SyncPoint::GetInstance()->DisableProcessing(); + + // Write something to DB just make sure that consistency check didn't + // fail and make the DB readable. +} + +INSTANTIATE_TEST_CASE_P(ChangeLevelConflictsWithAuto, + ChangeLevelConflictsWithAuto, testing::Bool()); + +TEST_F(DBCompactionTest, ChangeLevelCompactRangeConflictsWithManual) { + // A `CompactRange()` with `change_level == true` needs to execute its final + // step, `ReFitLevel()`, in isolation. Previously there was a bug where + // refitting could target the same level as an ongoing manual compaction, + // leading to overlapping files in that level. + // + // This test ensures that case is not possible by verifying any manual + // compaction issued during the `ReFitLevel()` phase fails with + // `Status::Incomplete`. + Options options = CurrentOptions(); + options.memtable_factory.reset( + test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); + options.level0_file_num_compaction_trigger = 2; + options.num_levels = 3; + Reopen(options); + + // Setup an LSM with three levels populated. + Random rnd(301); + int key_idx = 0; + GenerateNewFile(&rnd, &key_idx); + { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 2; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + } + ASSERT_EQ("0,0,2", FilesPerLevel(0)); + + GenerateNewFile(&rnd, &key_idx); + GenerateNewFile(&rnd, &key_idx); + ASSERT_EQ("1,1,2", FilesPerLevel(0)); + + // The background thread will refit L2->L1 while the + // foreground thread will try to simultaneously compact L0->L1. + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ + // The first two dependencies ensure the foreground creates an L0 file + // between the background compaction's L0->L1 and its L1->L2. + { + "DBImpl::RunManualCompaction()::1", + "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" + "PutFG", + }, + { + "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" + "FlushedFG", + "DBImpl::RunManualCompaction()::2", + }, + // The next two dependencies ensure the foreground invokes + // `CompactRange()` while the background is refitting. The + // foreground's `CompactRange()` is guaranteed to attempt an L0->L1 + // as we set it up with an empty memtable and a new L0 file. + { + "DBImpl::CompactRange:PreRefitLevel", + "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" + "CompactFG", + }, + { + "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" + "CompactedFG", + "DBImpl::CompactRange:PostRefitLevel", + }, + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + ROCKSDB_NAMESPACE::port::Thread refit_level_thread([&] { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 1; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + }); + + TEST_SYNC_POINT( + "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:PutFG"); + // Make sure we have something new to compact in the foreground. + // Note key 1 is carefully chosen as it ensures the file we create here + // overlaps with one of the files being refitted L2->L1 in the background. + // If we chose key 0, the file created here would not overlap. + ASSERT_OK(Put(Key(1), "val")); + ASSERT_OK(Flush()); + TEST_SYNC_POINT( + "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:FlushedFG"); + + TEST_SYNC_POINT( + "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:CompactFG"); + ASSERT_TRUE(dbfull() + ->CompactRange(CompactRangeOptions(), nullptr, nullptr) + .IsIncomplete()); + TEST_SYNC_POINT( + "DBCompactionTest::ChangeLevelCompactRangeConflictsWithManual:" + "CompactedFG"); + refit_level_thread.join(); +} + +TEST_F(DBCompactionTest, ChangeLevelErrorPathTest) { + // This test is added to ensure that RefitLevel() error paths are clearing + // internal flags and to test that subsequent valid RefitLevel() calls + // succeeds + Options options = CurrentOptions(); + options.memtable_factory.reset( + test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1)); + options.level0_file_num_compaction_trigger = 2; + options.num_levels = 3; + Reopen(options); + + ASSERT_EQ("", FilesPerLevel(0)); + + // Setup an LSM with three levels populated. + Random rnd(301); + int key_idx = 0; + GenerateNewFile(&rnd, &key_idx); + ASSERT_EQ("1", FilesPerLevel(0)); + { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 2; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + } + ASSERT_EQ("0,0,2", FilesPerLevel(0)); + + auto start_idx = key_idx; + GenerateNewFile(&rnd, &key_idx); + GenerateNewFile(&rnd, &key_idx); + auto end_idx = key_idx - 1; + ASSERT_EQ("1,1,2", FilesPerLevel(0)); + + // Next two CompactRange() calls are used to test exercise error paths within + // RefitLevel() before triggering a valid RefitLevel() call + + // Trigger a refit to L1 first + { + std::string begin_string = Key(start_idx); + std::string end_string = Key(end_idx); + Slice begin(begin_string); + Slice end(end_string); + + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 1; + ASSERT_OK(dbfull()->CompactRange(cro, &begin, &end)); + } + ASSERT_EQ("0,3,2", FilesPerLevel(0)); + + // Try a refit from L2->L1 - this should fail and exercise error paths in + // RefitLevel() + { + // Select key range that matches the bottom most level (L2) + std::string begin_string = Key(0); + std::string end_string = Key(start_idx - 1); + Slice begin(begin_string); + Slice end(end_string); + + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 1; + ASSERT_NOK(dbfull()->CompactRange(cro, &begin, &end)); + } + ASSERT_EQ("0,3,2", FilesPerLevel(0)); + + // Try a valid Refit request to ensure, the path is still working + { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 1; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + } + ASSERT_EQ("0,5", FilesPerLevel(0)); +} + +TEST_F(DBCompactionTest, CompactionWithBlob) { + Options options; + options.env = env_; + options.disable_auto_compactions = true; + + Reopen(options); + + constexpr char first_key[] = "first_key"; + constexpr char second_key[] = "second_key"; + constexpr char first_value[] = "first_value"; + constexpr char second_value[] = "second_value"; + constexpr char third_value[] = "third_value"; + + ASSERT_OK(Put(first_key, first_value)); + ASSERT_OK(Put(second_key, first_value)); + ASSERT_OK(Flush()); + + ASSERT_OK(Put(first_key, second_value)); + ASSERT_OK(Put(second_key, second_value)); + ASSERT_OK(Flush()); + + ASSERT_OK(Put(first_key, third_value)); + ASSERT_OK(Put(second_key, third_value)); + ASSERT_OK(Flush()); + + options.enable_blob_files = true; + + Reopen(options); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + ASSERT_EQ(Get(first_key), third_value); + ASSERT_EQ(Get(second_key), third_value); + + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + + ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); + ASSERT_NE(cfd, nullptr); + + Version* const current = cfd->current(); + ASSERT_NE(current, nullptr); + + const VersionStorageInfo* const storage_info = current->storage_info(); + ASSERT_NE(storage_info, nullptr); + + const auto& l1_files = storage_info->LevelFiles(1); + ASSERT_EQ(l1_files.size(), 1); + + const FileMetaData* const table_file = l1_files[0]; + ASSERT_NE(table_file, nullptr); + + const auto& blob_files = storage_info->GetBlobFiles(); + ASSERT_EQ(blob_files.size(), 1); + + const auto& blob_file = blob_files.begin()->second; + ASSERT_NE(blob_file, nullptr); + + ASSERT_EQ(table_file->smallest.user_key(), first_key); + ASSERT_EQ(table_file->largest.user_key(), second_key); + ASSERT_EQ(table_file->oldest_blob_file_number, + blob_file->GetBlobFileNumber()); + + ASSERT_EQ(blob_file->GetTotalBlobCount(), 2); + + const InternalStats* const internal_stats = cfd->internal_stats(); + ASSERT_NE(internal_stats, nullptr); + + const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); + ASSERT_GE(compaction_stats.size(), 2); + ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); + ASSERT_EQ(compaction_stats[1].bytes_written, table_file->fd.GetFileSize()); + ASSERT_EQ(compaction_stats[1].bytes_written_blob, + blob_file->GetTotalBlobBytes()); + ASSERT_EQ(compaction_stats[1].num_output_files, 1); + ASSERT_EQ(compaction_stats[1].num_output_files_blob, 1); +} + +class DBCompactionTestBlobError + : public DBCompactionTest, + public testing::WithParamInterface { + public: + DBCompactionTestBlobError() : sync_point_(GetParam()) {} + + std::string sync_point_; +}; + +INSTANTIATE_TEST_CASE_P(DBCompactionTestBlobError, DBCompactionTestBlobError, + ::testing::ValuesIn(std::vector{ + "BlobFileBuilder::WriteBlobToFile:AddRecord", + "BlobFileBuilder::WriteBlobToFile:AppendFooter"})); + +TEST_P(DBCompactionTestBlobError, CompactionError) { + Options options; + options.disable_auto_compactions = true; + options.env = env_; + + Reopen(options); + + constexpr char first_key[] = "first_key"; + constexpr char second_key[] = "second_key"; + constexpr char first_value[] = "first_value"; + constexpr char second_value[] = "second_value"; + constexpr char third_value[] = "third_value"; + + ASSERT_OK(Put(first_key, first_value)); + ASSERT_OK(Put(second_key, first_value)); + ASSERT_OK(Flush()); + + ASSERT_OK(Put(first_key, second_value)); + ASSERT_OK(Put(second_key, second_value)); + ASSERT_OK(Flush()); + + ASSERT_OK(Put(first_key, third_value)); + ASSERT_OK(Put(second_key, third_value)); + ASSERT_OK(Flush()); + + options.enable_blob_files = true; + + Reopen(options); + + SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) { + Status* const s = static_cast(arg); + assert(s); + + (*s) = Status::IOError(sync_point_); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), begin, end).IsIOError()); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + + ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); + ASSERT_NE(cfd, nullptr); + + Version* const current = cfd->current(); + ASSERT_NE(current, nullptr); + + const VersionStorageInfo* const storage_info = current->storage_info(); + ASSERT_NE(storage_info, nullptr); + + const auto& l1_files = storage_info->LevelFiles(1); + ASSERT_TRUE(l1_files.empty()); + + const auto& blob_files = storage_info->GetBlobFiles(); + ASSERT_TRUE(blob_files.empty()); + + const InternalStats* const internal_stats = cfd->internal_stats(); + ASSERT_NE(internal_stats, nullptr); + + const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); + ASSERT_GE(compaction_stats.size(), 2); + + if (sync_point_ == "BlobFileBuilder::WriteBlobToFile:AddRecord") { + ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); + ASSERT_EQ(compaction_stats[1].bytes_written, 0); + ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); + ASSERT_EQ(compaction_stats[1].num_output_files, 0); + ASSERT_EQ(compaction_stats[1].num_output_files_blob, 0); + } else { + // SST file writing succeeded; blob file writing failed (during Finish) + ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); + ASSERT_GT(compaction_stats[1].bytes_written, 0); + ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); + ASSERT_EQ(compaction_stats[1].num_output_files, 1); + ASSERT_EQ(compaction_stats[1].num_output_files_blob, 0); + } +} + +class DBCompactionTestBlobGC + : public DBCompactionTest, + public testing::WithParamInterface> { + public: + DBCompactionTestBlobGC() + : blob_gc_age_cutoff_(std::get<0>(GetParam())), + updated_enable_blob_files_(std::get<1>(GetParam())) {} + + double blob_gc_age_cutoff_; + bool updated_enable_blob_files_; +}; + +INSTANTIATE_TEST_CASE_P(DBCompactionTestBlobGC, DBCompactionTestBlobGC, + ::testing::Combine(::testing::Values(0.0, 0.5, 1.0), + ::testing::Bool())); + +TEST_P(DBCompactionTestBlobGC, CompactionWithBlobGC) { + Options options; + options.env = env_; + options.disable_auto_compactions = true; + options.enable_blob_files = true; + options.blob_file_size = 32; // one blob per file + options.enable_blob_garbage_collection = true; + options.blob_garbage_collection_age_cutoff = blob_gc_age_cutoff_; + + Reopen(options); + + constexpr char first_key[] = "first_key"; + constexpr char first_value[] = "first_value"; + constexpr char second_key[] = "second_key"; + constexpr char second_value[] = "second_value"; + + ASSERT_OK(Put(first_key, first_value)); + ASSERT_OK(Put(second_key, second_value)); + ASSERT_OK(Flush()); + + constexpr char third_key[] = "third_key"; + constexpr char third_value[] = "third_value"; + constexpr char fourth_key[] = "fourth_key"; + constexpr char fourth_value[] = "fourth_value"; + + ASSERT_OK(Put(third_key, third_value)); + ASSERT_OK(Put(fourth_key, fourth_value)); + ASSERT_OK(Flush()); + + const std::vector original_blob_files = GetBlobFileNumbers(); + + ASSERT_EQ(original_blob_files.size(), 4); + + const size_t cutoff_index = static_cast( + options.blob_garbage_collection_age_cutoff * original_blob_files.size()); + + // Note: turning off enable_blob_files before the compaction results in + // garbage collected values getting inlined. + size_t expected_number_of_files = original_blob_files.size(); + + if (!updated_enable_blob_files_) { + ASSERT_OK(db_->SetOptions({{"enable_blob_files", "false"}})); + + expected_number_of_files -= cutoff_index; + } + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + ASSERT_EQ(Get(first_key), first_value); + ASSERT_EQ(Get(second_key), second_value); + ASSERT_EQ(Get(third_key), third_value); + ASSERT_EQ(Get(fourth_key), fourth_value); + + const std::vector new_blob_files = GetBlobFileNumbers(); + + ASSERT_EQ(new_blob_files.size(), expected_number_of_files); + + // Original blob files below the cutoff should be gone, original blob files at + // or above the cutoff should be still there + for (size_t i = cutoff_index; i < original_blob_files.size(); ++i) { + ASSERT_EQ(new_blob_files[i - cutoff_index], original_blob_files[i]); + } + + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + assert(versions->GetColumnFamilySet()); + + ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); + assert(cfd); + + const InternalStats* const internal_stats = cfd->internal_stats(); + assert(internal_stats); + + const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); + ASSERT_GE(compaction_stats.size(), 2); + + if (blob_gc_age_cutoff_ > 0.0) { + ASSERT_GT(compaction_stats[1].bytes_read_blob, 0); + + if (updated_enable_blob_files_) { + // GC relocated some blobs to new blob files + ASSERT_GT(compaction_stats[1].bytes_written_blob, 0); + ASSERT_EQ(compaction_stats[1].bytes_read_blob, + compaction_stats[1].bytes_written_blob); + } else { + // GC moved some blobs back to the LSM, no new blob files + ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); + } + } else { + ASSERT_EQ(compaction_stats[1].bytes_read_blob, 0); + ASSERT_EQ(compaction_stats[1].bytes_written_blob, 0); + } +} + +TEST_F(DBCompactionTest, CompactionWithBlobGCError_CorruptIndex) { + Options options; + options.env = env_; + options.disable_auto_compactions = true; + options.enable_blob_files = true; + options.enable_blob_garbage_collection = true; + options.blob_garbage_collection_age_cutoff = 1.0; + + Reopen(options); + + constexpr char first_key[] = "first_key"; + constexpr char first_value[] = "first_value"; + ASSERT_OK(Put(first_key, first_value)); + + constexpr char second_key[] = "second_key"; + constexpr char second_value[] = "second_value"; + ASSERT_OK(Put(second_key, second_value)); + + ASSERT_OK(Flush()); + + constexpr char third_key[] = "third_key"; + constexpr char third_value[] = "third_value"; + ASSERT_OK(Put(third_key, third_value)); + + constexpr char fourth_key[] = "fourth_key"; + constexpr char corrupt_blob_index[] = "foobar"; + + WriteBatch batch; + ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, fourth_key, + corrupt_blob_index)); + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + ASSERT_OK(Flush()); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_TRUE( + db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption()); +} + +TEST_F(DBCompactionTest, CompactionWithBlobGCError_InlinedTTLIndex) { + constexpr uint64_t min_blob_size = 10; + + Options options; + options.env = env_; + options.disable_auto_compactions = true; + options.enable_blob_files = true; + options.min_blob_size = min_blob_size; + options.enable_blob_garbage_collection = true; + options.blob_garbage_collection_age_cutoff = 1.0; + + Reopen(options); + + constexpr char first_key[] = "first_key"; + constexpr char first_value[] = "first_value"; + ASSERT_OK(Put(first_key, first_value)); + + constexpr char second_key[] = "second_key"; + constexpr char second_value[] = "second_value"; + ASSERT_OK(Put(second_key, second_value)); + + ASSERT_OK(Flush()); + + constexpr char third_key[] = "third_key"; + constexpr char third_value[] = "third_value"; + ASSERT_OK(Put(third_key, third_value)); + + constexpr char fourth_key[] = "fourth_key"; + constexpr char blob[] = "short"; + static_assert(sizeof(short) - 1 < min_blob_size, + "Blob too long to be inlined"); + + // Fake an inlined TTL blob index. + std::string blob_index; + + constexpr uint64_t expiration = 1234567890; + + BlobIndex::EncodeInlinedTTL(&blob_index, expiration, blob); + + WriteBatch batch; + ASSERT_OK( + WriteBatchInternal::PutBlobIndex(&batch, 0, fourth_key, blob_index)); + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + ASSERT_OK(Flush()); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_TRUE( + db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption()); +} + +TEST_F(DBCompactionTest, CompactionWithBlobGCError_IndexWithInvalidFileNumber) { + Options options; + options.env = env_; + options.disable_auto_compactions = true; + options.enable_blob_files = true; + options.enable_blob_garbage_collection = true; + options.blob_garbage_collection_age_cutoff = 1.0; + + Reopen(options); + + constexpr char first_key[] = "first_key"; + constexpr char first_value[] = "first_value"; + ASSERT_OK(Put(first_key, first_value)); + + constexpr char second_key[] = "second_key"; + constexpr char second_value[] = "second_value"; + ASSERT_OK(Put(second_key, second_value)); + + ASSERT_OK(Flush()); + + constexpr char third_key[] = "third_key"; + constexpr char third_value[] = "third_value"; + ASSERT_OK(Put(third_key, third_value)); + + constexpr char fourth_key[] = "fourth_key"; + + // Fake a blob index referencing a non-existent blob file. + std::string blob_index; + + constexpr uint64_t blob_file_number = 1000; + constexpr uint64_t offset = 1234; + constexpr uint64_t size = 5678; + + BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size, + kNoCompression); + + WriteBatch batch; + ASSERT_OK( + WriteBatchInternal::PutBlobIndex(&batch, 0, fourth_key, blob_index)); + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + ASSERT_OK(Flush()); + + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + + ASSERT_TRUE( + db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption()); +} + +TEST_F(DBCompactionTest, CompactionWithChecksumHandoff1) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + std::shared_ptr fault_fs( + new FaultInjectionTestFS(FileSystem::Default())); + std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 2; + options.num_levels = 3; + options.env = fault_fs_env.get(); + options.create_if_missing = true; + options.checksum_handoff_file_types.Add(FileType::kTableFile); + Status s; + Reopen(options); + + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s, Status::OK()); + Destroy(options); + Reopen(options); + + // The hash does not match, compaction write fails + // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + // Since the file system returns IOStatus::Corruption, it is an + // unrecoverable error. + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::FlushMemTable:FlushMemTableFinished", + "BackgroundCallCompaction:0"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", [&](void*) { + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s.severity(), + ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); + SyncPoint::GetInstance()->DisableProcessing(); + Destroy(options); + Reopen(options); + + // The file system does not support checksum handoff. The check + // will be ignored. + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s, Status::OK()); + + // Each write will be similated as corrupted. + // Since the file system returns IOStatus::Corruption, it is an + // unrecoverable error. + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::FlushMemTable:FlushMemTableFinished", + "BackgroundCallCompaction:0"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", + [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s.severity(), + ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); + SyncPoint::GetInstance()->DisableProcessing(); + + Destroy(options); +} + +TEST_F(DBCompactionTest, CompactionWithChecksumHandoff2) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + std::shared_ptr fault_fs( + new FaultInjectionTestFS(FileSystem::Default())); + std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 2; + options.num_levels = 3; + options.env = fault_fs_env.get(); + options.create_if_missing = true; + Status s; + Reopen(options); + + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s, Status::OK()); + Destroy(options); + Reopen(options); + + // options is not set, the checksum handoff will not be triggered + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::FlushMemTable:FlushMemTableFinished", + "BackgroundCallCompaction:0"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", [&](void*) { + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s, Status::OK()); + SyncPoint::GetInstance()->DisableProcessing(); + Destroy(options); + Reopen(options); + + // The file system does not support checksum handoff. The check + // will be ignored. + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s, Status::OK()); + + // options is not set, the checksum handoff will not be triggered + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::FlushMemTable:FlushMemTableFinished", + "BackgroundCallCompaction:0"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", + [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s, Status::OK()); + + Destroy(options); +} + +TEST_F(DBCompactionTest, CompactionWithChecksumHandoffManifest1) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + std::shared_ptr fault_fs( + new FaultInjectionTestFS(FileSystem::Default())); + std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 2; + options.num_levels = 3; + options.env = fault_fs_env.get(); + options.create_if_missing = true; + options.checksum_handoff_file_types.Add(FileType::kDescriptorFile); + Status s; + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + Reopen(options); + + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s, Status::OK()); + Destroy(options); + Reopen(options); + + // The hash does not match, compaction write fails + // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + // Since the file system returns IOStatus::Corruption, it is mapped to + // kFatalError error. + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::FlushMemTable:FlushMemTableFinished", + "BackgroundCallCompaction:0"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", [&](void*) { + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); + SyncPoint::GetInstance()->DisableProcessing(); + Destroy(options); +} + +TEST_F(DBCompactionTest, CompactionWithChecksumHandoffManifest2) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + std::shared_ptr fault_fs( + new FaultInjectionTestFS(FileSystem::Default())); + std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = 2; + options.num_levels = 3; + options.env = fault_fs_env.get(); + options.create_if_missing = true; + options.checksum_handoff_file_types.Add(FileType::kDescriptorFile); + Status s; + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); + Reopen(options); + + // The file system does not support checksum handoff. The check + // will be ignored. + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s, Status::OK()); + + // Each write will be similated as corrupted. + // Since the file system returns IOStatus::Corruption, it is mapped to + // kFatalError error. + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + ASSERT_OK(Put(Key(0), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::FlushMemTable:FlushMemTableFinished", + "BackgroundCallCompaction:0"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", + [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put(Key(1), "value3")); + s = Flush(); + ASSERT_EQ(s, Status::OK()); + s = dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); + SyncPoint::GetInstance()->DisableProcessing(); + + Destroy(options); +} + +TEST_F(DBCompactionTest, FIFOWarm) { + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleFIFO; + options.num_levels = 1; + options.max_open_files = -1; + options.level0_file_num_compaction_trigger = 2; + options.create_if_missing = true; + CompactionOptionsFIFO fifo_options; + fifo_options.age_for_warm = 1000; + fifo_options.max_table_files_size = 100000000; + options.compaction_options_fifo = fifo_options; + env_->SetMockSleep(); + Reopen(options); + + int total_warm = 0; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "NewWritableFile::FileOptions.temperature", [&](void* arg) { + Temperature temperature = *(static_cast(arg)); + if (temperature == Temperature::kWarm) { + total_warm++; + } + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + // The file system does not support checksum handoff. The check + // will be ignored. + ASSERT_OK(Put(Key(0), "value1")); + env_->MockSleepForSeconds(800); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + + ASSERT_OK(Put(Key(0), "value1")); + env_->MockSleepForSeconds(800); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + + ASSERT_OK(Put(Key(0), "value1")); + env_->MockSleepForSeconds(800); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + ASSERT_OK(Put(Key(0), "value1")); + env_->MockSleepForSeconds(800); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + + ColumnFamilyMetaData metadata; + db_->GetColumnFamilyMetaData(&metadata); + ASSERT_EQ(4, metadata.file_count); + ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[0].temperature); + ASSERT_EQ(Temperature::kUnknown, metadata.levels[0].files[1].temperature); + ASSERT_EQ(Temperature::kWarm, metadata.levels[0].files[2].temperature); + ASSERT_EQ(Temperature::kWarm, metadata.levels[0].files[3].temperature); + ASSERT_EQ(2, total_warm); + + Destroy(options); +} + +TEST_F(DBCompactionTest, DisableMultiManualCompaction) { + const int kNumL0Files = 10; + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + Reopen(options); + + // Generate 2 levels of file to make sure the manual compaction is not skipped + for (int i = 0; i < 10; i++) { + ASSERT_OK(Put(Key(i), "value")); + if (i % 2) { + ASSERT_OK(Flush()); + } + } + MoveFilesToLevel(2); + + for (int i = 0; i < 10; i++) { + ASSERT_OK(Put(Key(i), "value")); + if (i % 2) { + ASSERT_OK(Flush()); + } + } + MoveFilesToLevel(1); + + // Block compaction queue + test::SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + + port::Thread compact_thread1([&]() { + CompactRangeOptions cro; + cro.exclusive_manual_compaction = false; + std::string begin_str = Key(0); + std::string end_str = Key(3); + Slice b = begin_str; + Slice e = end_str; + auto s = db_->CompactRange(cro, &b, &e); + ASSERT_TRUE(s.IsIncomplete()); + }); + + port::Thread compact_thread2([&]() { + CompactRangeOptions cro; + cro.exclusive_manual_compaction = false; + std::string begin_str = Key(4); + std::string end_str = Key(7); + Slice b = begin_str; + Slice e = end_str; + auto s = db_->CompactRange(cro, &b, &e); + ASSERT_TRUE(s.IsIncomplete()); + }); + + // Disable manual compaction should cancel both manual compactions and both + // compaction should return incomplete. + db_->DisableManualCompaction(); + + compact_thread1.join(); + compact_thread2.join(); + + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); + ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); +} + +TEST_F(DBCompactionTest, DisableJustStartedManualCompaction) { + const int kNumL0Files = 4; + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + Reopen(options); + + // generate files, but avoid trigger auto compaction + for (int i = 0; i < kNumL0Files / 2; i++) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + + // make sure the manual compaction background is started but not yet set the + // status to in_progress, then cancel the manual compaction, which should not + // result in segfault + SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::BGWorkCompaction", + "DBCompactionTest::DisableJustStartedManualCompaction:" + "PreDisableManualCompaction"}, + {"DBImpl::RunManualCompaction:Unscheduled", + "BackgroundCallCompaction:0"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + port::Thread compact_thread([&]() { + CompactRangeOptions cro; + cro.exclusive_manual_compaction = true; + auto s = db_->CompactRange(cro, nullptr, nullptr); + ASSERT_TRUE(s.IsIncomplete()); + }); + TEST_SYNC_POINT( + "DBCompactionTest::DisableJustStartedManualCompaction:" + "PreDisableManualCompaction"); + db_->DisableManualCompaction(); + + compact_thread.join(); +} + +TEST_F(DBCompactionTest, DisableInProgressManualCompaction) { + const int kNumL0Files = 4; + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + Reopen(options); + + SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::BackgroundCompaction:InProgress", + "DBCompactionTest::DisableInProgressManualCompaction:" + "PreDisableManualCompaction"}, + {"DBImpl::RunManualCompaction:Unscheduled", + "CompactionJob::Run():Start"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + // generate files, but avoid trigger auto compaction + for (int i = 0; i < kNumL0Files / 2; i++) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + + port::Thread compact_thread([&]() { + CompactRangeOptions cro; + cro.exclusive_manual_compaction = true; + auto s = db_->CompactRange(cro, nullptr, nullptr); + ASSERT_TRUE(s.IsIncomplete()); + }); + + TEST_SYNC_POINT( + "DBCompactionTest::DisableInProgressManualCompaction:" + "PreDisableManualCompaction"); + db_->DisableManualCompaction(); + + compact_thread.join(); +} + +TEST_F(DBCompactionTest, DisableManualCompactionThreadQueueFull) { + const int kNumL0Files = 4; + + SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::RunManualCompaction:Scheduled", + "DBCompactionTest::DisableManualCompactionThreadQueueFull:" + "PreDisableManualCompaction"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + Reopen(options); + + // Block compaction queue + test::SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + + // generate files, but avoid trigger auto compaction + for (int i = 0; i < kNumL0Files / 2; i++) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + + port::Thread compact_thread([&]() { + CompactRangeOptions cro; + cro.exclusive_manual_compaction = true; + auto s = db_->CompactRange(cro, nullptr, nullptr); + ASSERT_TRUE(s.IsIncomplete()); + }); + + TEST_SYNC_POINT( + "DBCompactionTest::DisableManualCompactionThreadQueueFull:" + "PreDisableManualCompaction"); + + // Generate more files to trigger auto compaction which is scheduled after + // manual compaction. Has to generate 4 more files because existing files are + // pending compaction + for (int i = 0; i < kNumL0Files; i++) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + ASSERT_EQ(ToString(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0)); + + db_->DisableManualCompaction(); + + // CompactRange should return before the compaction has the chance to run + compact_thread.join(); + + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); + ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); + ASSERT_EQ("0,1", FilesPerLevel(0)); +} + +TEST_F(DBCompactionTest, DisableManualCompactionThreadQueueFullDBClose) { + const int kNumL0Files = 4; + + SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::RunManualCompaction:Scheduled", + "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:" + "PreDisableManualCompaction"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + Reopen(options); + + // Block compaction queue + test::SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + + // generate files, but avoid trigger auto compaction + for (int i = 0; i < kNumL0Files / 2; i++) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + + port::Thread compact_thread([&]() { + CompactRangeOptions cro; + cro.exclusive_manual_compaction = true; + auto s = db_->CompactRange(cro, nullptr, nullptr); + ASSERT_TRUE(s.IsIncomplete()); + }); + + TEST_SYNC_POINT( + "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:" + "PreDisableManualCompaction"); + + // Generate more files to trigger auto compaction which is scheduled after + // manual compaction. Has to generate 4 more files because existing files are + // pending compaction + for (int i = 0; i < kNumL0Files; i++) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + ASSERT_EQ(ToString(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0)); + + db_->DisableManualCompaction(); + + // CompactRange should return before the compaction has the chance to run + compact_thread.join(); + + // Try close DB while manual compaction is canceled but still in the queue. + // And an auto-triggered compaction is also in the queue. + auto s = db_->Close(); + ASSERT_OK(s); + + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); +} + +TEST_F(DBCompactionTest, DBCloseWithManualCompaction) { + const int kNumL0Files = 4; + + SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::RunManualCompaction:Scheduled", + "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:" + "PreDisableManualCompaction"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + Reopen(options); + + // Block compaction queue + test::SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + + // generate files, but avoid trigger auto compaction + for (int i = 0; i < kNumL0Files / 2; i++) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + + port::Thread compact_thread([&]() { + CompactRangeOptions cro; + cro.exclusive_manual_compaction = true; + auto s = db_->CompactRange(cro, nullptr, nullptr); + ASSERT_TRUE(s.IsIncomplete()); + }); + + TEST_SYNC_POINT( + "DBCompactionTest::DisableManualCompactionThreadQueueFullDBClose:" + "PreDisableManualCompaction"); + + // Generate more files to trigger auto compaction which is scheduled after + // manual compaction. Has to generate 4 more files because existing files are + // pending compaction + for (int i = 0; i < kNumL0Files; i++) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + ASSERT_EQ(ToString(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0)); + + // Close DB with manual compaction and auto triggered compaction in the queue. + auto s = db_->Close(); + ASSERT_OK(s); + + // manual compaction thread should return with Incomplete(). + compact_thread.join(); + + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); +} + +TEST_F(DBCompactionTest, + DisableManualCompactionDoesNotWaitForDrainingAutomaticCompaction) { + // When `CompactRangeOptions::exclusive_manual_compaction == true`, we wait + // for automatic compactions to drain before starting the manual compaction. + // This test verifies `DisableManualCompaction()` can cancel such a compaction + // without waiting for the drain to complete. + const int kNumL0Files = 4; + + // Enforces manual compaction enters wait loop due to pending automatic + // compaction. + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::BGWorkCompaction", "DBImpl::RunManualCompaction:NotScheduled"}, + {"DBImpl::RunManualCompaction:WaitScheduled", + "BackgroundCallCompaction:0"}}); + // The automatic compaction will cancel the waiting manual compaction. + // Completing this implies the cancellation did not wait on automatic + // compactions to finish. + bool callback_completed = false; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "BackgroundCallCompaction:0", [&](void* /*arg*/) { + db_->DisableManualCompaction(); + callback_completed = true; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + Reopen(options); + + for (int i = 0; i < kNumL0Files; ++i) { + ASSERT_OK(Put(Key(1), "value1")); + ASSERT_OK(Put(Key(2), "value2")); + ASSERT_OK(Flush()); + } + + CompactRangeOptions cro; + cro.exclusive_manual_compaction = true; + ASSERT_TRUE(db_->CompactRange(cro, nullptr, nullptr).IsIncomplete()); + + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_TRUE(callback_completed); +} + +TEST_F(DBCompactionTest, ChangeLevelConflictsWithManual) { + Options options = CurrentOptions(); + options.num_levels = 3; + Reopen(options); + + // Setup an LSM with L2 populated. + Random rnd(301); + ASSERT_OK(Put(Key(0), rnd.RandomString(990))); + ASSERT_OK(Put(Key(1), rnd.RandomString(990))); + { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 2; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + } + ASSERT_EQ("0,0,1", FilesPerLevel(0)); + + // The background thread will refit L2->L1 while the foreground thread will + // attempt to run a compaction on new data. The following dependencies + // ensure the background manual compaction's refitting phase disables manual + // compaction immediately before the foreground manual compaction can register + // itself. Manual compaction is kept disabled until the foreground manual + // checks for the failure once. + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ + // Only do Put()s for foreground CompactRange() once the background + // CompactRange() has reached the refitting phase. + { + "DBImpl::CompactRange:BeforeRefit:1", + "DBCompactionTest::ChangeLevelConflictsWithManual:" + "PreForegroundCompactRange", + }, + // Right before we register the manual compaction, proceed with + // the refitting phase so manual compactions are disabled. Stay in + // the refitting phase with manual compactions disabled until it is + // noticed. + { + "DBImpl::RunManualCompaction:0", + "DBImpl::CompactRange:BeforeRefit:2", + }, + { + "DBImpl::CompactRange:PreRefitLevel", + "DBImpl::RunManualCompaction:1", + }, + { + "DBImpl::RunManualCompaction:PausedAtStart", + "DBImpl::CompactRange:PostRefitLevel", + }, + // If compaction somehow were scheduled, let's let it run after reenabling + // manual compactions. This dependency is not expected to be hit but is + // here for speculatively coercing future bugs. + { + "DBImpl::CompactRange:PostRefitLevel:ManualCompactionEnabled", + "BackgroundCallCompaction:0", + }, + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + ROCKSDB_NAMESPACE::port::Thread refit_level_thread([&] { + CompactRangeOptions cro; + cro.change_level = true; + cro.target_level = 1; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + }); + + TEST_SYNC_POINT( + "DBCompactionTest::ChangeLevelConflictsWithManual:" + "PreForegroundCompactRange"); + ASSERT_OK(Put(Key(0), rnd.RandomString(990))); + ASSERT_OK(Put(Key(1), rnd.RandomString(990))); + ASSERT_TRUE(dbfull() + ->CompactRange(CompactRangeOptions(), nullptr, nullptr) + .IsIncomplete()); + + refit_level_thread.join(); +} + +TEST_F(DBCompactionTest, BottomPriCompactionCountsTowardConcurrencyLimit) { + // Flushes several files to trigger compaction while lock is released during + // a bottom-pri compaction. Verifies it does not get scheduled to thread pool + // because per-DB limit for compaction parallelism is one (default). + const int kNumL0Files = 4; + const int kNumLevels = 3; + + env_->SetBackgroundThreads(1, Env::Priority::BOTTOM); + + Options options = CurrentOptions(); + options.level0_file_num_compaction_trigger = kNumL0Files; + options.num_levels = kNumLevels; + DestroyAndReopen(options); + + // Setup last level to be non-empty since it's a bit unclear whether + // compaction to an empty level would be considered "bottommost". + ASSERT_OK(Put(Key(0), "val")); + ASSERT_OK(Flush()); + MoveFilesToLevel(kNumLevels - 1); + + SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::BGWorkBottomCompaction", + "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:" + "PreTriggerCompaction"}, + {"DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:" + "PostTriggerCompaction", + "BackgroundCallCompaction:0"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + port::Thread compact_range_thread([&] { + CompactRangeOptions cro; + cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; + cro.exclusive_manual_compaction = false; + ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); + }); + + // Sleep in the low-pri thread so any newly scheduled compaction will be + // queued. Otherwise it might finish before we check its existence. + test::SleepingBackgroundTask sleeping_task_low; + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + + TEST_SYNC_POINT( + "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:" + "PreTriggerCompaction"); + for (int i = 0; i < kNumL0Files; ++i) { + ASSERT_OK(Put(Key(0), "val")); + ASSERT_OK(Flush()); + } + ASSERT_EQ(0u, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); + TEST_SYNC_POINT( + "DBCompactionTest::BottomPriCompactionCountsTowardConcurrencyLimit:" + "PostTriggerCompaction"); + + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); + compact_range_thread.join(); +} + +#endif // !defined(ROCKSDB_LITE) + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_dynamic_level_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_dynamic_level_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_dynamic_level_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_dynamic_level_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -15,11 +15,14 @@ #include "db/db_test_util.h" #include "port/port.h" #include "port/stack_trace.h" +#include "rocksdb/env.h" +#include "util/random.h" namespace ROCKSDB_NAMESPACE { class DBTestDynamicLevel : public DBTestBase { public: - DBTestDynamicLevel() : DBTestBase("/db_dynamic_level_test") {} + DBTestDynamicLevel() + : DBTestBase("db_dynamic_level_test", /*env_do_fsync=*/true) {} }; TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase) { @@ -27,7 +30,7 @@ return; } // Use InMemoryEnv, or it would be too slow. - std::unique_ptr env(new MockEnv(env_)); + std::unique_ptr env(NewMemEnv(env_)); const int kNKeys = 1000; int keys[kNKeys]; @@ -50,7 +53,7 @@ keys[i] = i; } if (ordered_insert == 0) { - std::random_shuffle(std::begin(keys), std::end(keys)); + RandomShuffle(std::begin(keys), std::end(keys), rnd.Next()); } for (int max_background_compactions = 1; max_background_compactions < 4; max_background_compactions += 2) { @@ -80,9 +83,9 @@ for (int i = 0; i < kNKeys; i++) { int key = keys[i]; - ASSERT_OK(Put(Key(kNKeys + key), RandomString(&rnd, 102))); - ASSERT_OK(Put(Key(key), RandomString(&rnd, 102))); - ASSERT_OK(Put(Key(kNKeys * 2 + key), RandomString(&rnd, 102))); + ASSERT_OK(Put(Key(kNKeys + key), rnd.RandomString(102))); + ASSERT_OK(Put(Key(key), rnd.RandomString(102))); + ASSERT_OK(Put(Key(kNKeys * 2 + key), rnd.RandomString(102))); ASSERT_OK(Delete(Key(kNKeys + keys[i / 10]))); env_->SleepForMicroseconds(5000); } @@ -100,7 +103,8 @@ } // Test compact range works - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK( + dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); // All data should be in the last level. ColumnFamilyMetaData cf_meta; db_->GetColumnFamilyMetaData(&cf_meta); @@ -139,6 +143,7 @@ options.max_background_compactions = 2; options.num_levels = 5; options.max_compaction_bytes = 0; // Force not expanding in compactions + options.db_host_id = ""; // Setting this messes up the file size calculation BlockBasedTableOptions table_options; table_options.block_size = 1024; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); @@ -158,13 +163,13 @@ // Put about 28K to L0 for (int i = 0; i < 70; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - RandomString(&rnd, 380))); + rnd.RandomString(380))); } ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "false"}, })); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(4U, int_prop); @@ -175,14 +180,14 @@ })); for (int i = 0; i < 70; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - RandomString(&rnd, 380))); + rnd.RandomString(380))); } ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "false"}, })); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(3U, int_prop); ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level1", &str_prop)); @@ -197,13 +202,13 @@ // Write about 40K more for (int i = 0; i < 100; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - RandomString(&rnd, 380))); + rnd.RandomString(380))); } ASSERT_OK(dbfull()->SetOptions({ {"disable_auto_compactions", "false"}, })); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(3U, int_prop); @@ -216,7 +221,7 @@ // Each file is about 11KB, with 9KB of data. for (int i = 0; i < 1300; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - RandomString(&rnd, 380))); + rnd.RandomString(380))); } // Make sure that the compaction starts before the last bit of data is @@ -231,8 +236,8 @@ })); TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:0"); - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); ASSERT_EQ(2U, int_prop); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); @@ -257,11 +262,11 @@ TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:1"); for (int i = 0; i < 2; i++) { ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - RandomString(&rnd, 380))); + rnd.RandomString(380))); } TEST_SYNC_POINT("DynamicLevelMaxBytesBase2:2"); - Flush(); + ASSERT_OK(Flush()); thread.join(); @@ -299,7 +304,7 @@ DestroyAndReopen(options); // Compact against empty DB - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); uint64_t int_prop; std::string str_prop; @@ -310,16 +315,16 @@ // Put about 7K to L0 for (int i = 0; i < 140; i++) { - ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - RandomString(&rnd, 80))); + ASSERT_OK( + Put(Key(static_cast(rnd.Uniform(kMaxKey))), rnd.RandomString(80))); } - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); if (NumTableFilesAtLevel(0) == 0) { // Make sure level 0 is not empty - ASSERT_OK(Put(Key(static_cast(rnd.Uniform(kMaxKey))), - RandomString(&rnd, 80))); - Flush(); + ASSERT_OK( + Put(Key(static_cast(rnd.Uniform(kMaxKey))), rnd.RandomString(80))); + ASSERT_OK(Flush()); } ASSERT_TRUE(db_->GetIntProperty("rocksdb.base-level", &int_prop)); @@ -340,7 +345,7 @@ }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(output_levels.size(), 2); ASSERT_TRUE(output_levels.find(3) != output_levels.end()); ASSERT_TRUE(output_levels.find(4) != output_levels.end()); @@ -382,12 +387,12 @@ const int total_keys = 3000; const int random_part_size = 100; for (int i = 0; i < total_keys; i++) { - std::string value = RandomString(&rnd, random_part_size); + std::string value = rnd.RandomString(random_part_size); PutFixed32(&value, static_cast(i)); ASSERT_OK(Put(Key(i), value)); } - Flush(); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_EQ(non_trivial, 0); @@ -441,12 +446,12 @@ int total_keys = 1000; for (int i = 0; i < total_keys; i++) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, 102))); - ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102))); + ASSERT_OK(Put(Key(i), rnd.RandomString(102))); + ASSERT_OK(Put(Key(kMaxKey + i), rnd.RandomString(102))); ASSERT_OK(Delete(Key(i / 10))); } verify_func(total_keys, false); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); options.level_compaction_dynamic_level_bytes = true; options.disable_auto_compactions = true; @@ -461,7 +466,7 @@ CompactRangeOptions compact_options; compact_options.change_level = true; compact_options.target_level = options.num_levels - 1; - dbfull()->CompactRange(compact_options, nullptr, nullptr); + ASSERT_OK(dbfull()->CompactRange(compact_options, nullptr, nullptr)); compaction_finished.store(true); }); do { @@ -475,13 +480,13 @@ int total_keys2 = 2000; for (int i = total_keys; i < total_keys2; i++) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, 102))); - ASSERT_OK(Put(Key(kMaxKey + i), RandomString(&rnd, 102))); + ASSERT_OK(Put(Key(i), rnd.RandomString(102))); + ASSERT_OK(Put(Key(kMaxKey + i), rnd.RandomString(102))); ASSERT_OK(Delete(Key(i / 10))); } verify_func(total_keys2, false); - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); verify_func(total_keys2, false); // Base level is not level 1 diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_encryption_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_encryption_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_encryption_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_encryption_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -16,7 +16,15 @@ class DBEncryptionTest : public DBTestBase { public: - DBEncryptionTest() : DBTestBase("/db_encryption_test") {} + DBEncryptionTest() + : DBTestBase("db_encryption_test", /*env_do_fsync=*/true) {} + Env* GetTargetEnv() { + if (encrypted_env_ != nullptr) { + return (static_cast(encrypted_env_))->target(); + } else { + return env_; + } + } }; #ifndef ROCKSDB_LITE @@ -33,20 +41,20 @@ auto status = env_->GetChildren(dbname_, &fileNames); ASSERT_OK(status); - auto defaultEnv = Env::Default(); + Env* target = GetTargetEnv(); int hits = 0; for (auto it = fileNames.begin() ; it != fileNames.end(); ++it) { - if ((*it == "..") || (*it == ".")) { + if (*it == "LOCK") { continue; } auto filePath = dbname_ + "/" + *it; std::unique_ptr seqFile; auto envOptions = EnvOptions(CurrentOptions()); - status = defaultEnv->NewSequentialFile(filePath, &seqFile, envOptions); + status = target->NewSequentialFile(filePath, &seqFile, envOptions); ASSERT_OK(status); uint64_t fileSize; - status = defaultEnv->GetFileSize(filePath, &fileSize); + status = target->GetFileSize(filePath, &fileSize); ASSERT_OK(status); std::string scratch; @@ -84,7 +92,7 @@ } TEST_F(DBEncryptionTest, ReadEmptyFile) { - auto defaultEnv = Env::Default(); + auto defaultEnv = GetTargetEnv(); // create empty file for reading it back in later auto envOptions = EnvOptions(CurrentOptions()); diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_filesnapshot.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_filesnapshot.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_filesnapshot.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_filesnapshot.cc 2025-05-19 16:14:27.000000000 +0000 @@ -6,72 +6,62 @@ #ifndef ROCKSDB_LITE -#include #include -#include +#include +#include #include +#include + #include "db/db_impl/db_impl.h" #include "db/job_context.h" #include "db/version_set.h" #include "file/file_util.h" #include "file/filename.h" +#include "logging/logging.h" #include "port/port.h" #include "rocksdb/db.h" #include "rocksdb/env.h" +#include "rocksdb/metadata.h" +#include "rocksdb/types.h" #include "test_util/sync_point.h" +#include "util/file_checksum_helper.h" #include "util/mutexlock.h" namespace ROCKSDB_NAMESPACE { -Status DBImpl::DisableFileDeletions() { - InstrumentedMutexLock l(&mutex_); - ++disable_delete_obsolete_files_; - if (disable_delete_obsolete_files_ == 1) { - ROCKS_LOG_INFO(immutable_db_options_.info_log, "File Deletions Disabled"); - } else { - ROCKS_LOG_WARN(immutable_db_options_.info_log, - "File Deletions Disabled, but already disabled. Counter: %d", - disable_delete_obsolete_files_); - } - return Status::OK(); -} +Status DBImpl::FlushForGetLiveFiles() { + mutex_.AssertHeld(); -Status DBImpl::EnableFileDeletions(bool force) { - // Job id == 0 means that this is not our background process, but rather - // user thread - JobContext job_context(0); - bool file_deletion_enabled = false; - { - InstrumentedMutexLock l(&mutex_); - if (force) { - // if force, we need to enable file deletions right away - disable_delete_obsolete_files_ = 0; - } else if (disable_delete_obsolete_files_ > 0) { - --disable_delete_obsolete_files_; - } - if (disable_delete_obsolete_files_ == 0) { - file_deletion_enabled = true; - FindObsoleteFiles(&job_context, true); - bg_cv_.SignalAll(); - } - } - if (file_deletion_enabled) { - ROCKS_LOG_INFO(immutable_db_options_.info_log, "File Deletions Enabled"); - if (job_context.HaveSomethingToDelete()) { - PurgeObsoleteFiles(job_context); + // flush all dirty data to disk. + Status status; + if (immutable_db_options_.atomic_flush) { + autovector cfds; + SelectColumnFamiliesForAtomicFlush(&cfds); + mutex_.Unlock(); + status = + AtomicFlushMemTables(cfds, FlushOptions(), FlushReason::kGetLiveFiles); + if (status.IsColumnFamilyDropped()) { + status = Status::OK(); } + mutex_.Lock(); } else { - ROCKS_LOG_WARN(immutable_db_options_.info_log, - "File Deletions Enable, but not really enabled. Counter: %d", - disable_delete_obsolete_files_); + for (auto cfd : versions_->GetRefedColumnFamilySet()) { + if (cfd->IsDropped()) { + continue; + } + mutex_.Unlock(); + status = FlushMemTable(cfd, FlushOptions(), FlushReason::kGetLiveFiles); + TEST_SYNC_POINT("DBImpl::GetLiveFiles:1"); + TEST_SYNC_POINT("DBImpl::GetLiveFiles:2"); + mutex_.Lock(); + if (!status.ok() && !status.IsColumnFamilyDropped()) { + break; + } else if (status.IsColumnFamilyDropped()) { + status = Status::OK(); + } + } } - job_context.Clean(); - LogFlush(immutable_db_options_.info_log); - return Status::OK(); -} - -int DBImpl::IsFileDeletionsEnabled() const { - return !disable_delete_obsolete_files_; + return status; } Status DBImpl::GetLiveFiles(std::vector& ret, @@ -82,34 +72,7 @@ mutex_.Lock(); if (flush_memtable) { - // flush all dirty data to disk. - Status status; - if (immutable_db_options_.atomic_flush) { - autovector cfds; - SelectColumnFamiliesForAtomicFlush(&cfds); - mutex_.Unlock(); - status = AtomicFlushMemTables(cfds, FlushOptions(), - FlushReason::kGetLiveFiles); - mutex_.Lock(); - } else { - for (auto cfd : *versions_->GetColumnFamilySet()) { - if (cfd->IsDropped()) { - continue; - } - cfd->Ref(); - mutex_.Unlock(); - status = FlushMemTable(cfd, FlushOptions(), FlushReason::kGetLiveFiles); - TEST_SYNC_POINT("DBImpl::GetLiveFiles:1"); - TEST_SYNC_POINT("DBImpl::GetLiveFiles:2"); - mutex_.Lock(); - cfd->UnrefAndTryDelete(); - if (!status.ok()) { - break; - } - } - } - versions_->GetColumnFamilySet()->FreeDeadColumnFamilies(); - + Status status = FlushForGetLiveFiles(); if (!status.ok()) { mutex_.Unlock(); ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Cannot Flush data %s\n", @@ -118,27 +81,40 @@ } } - // Make a set of all of the live *.sst files - std::vector live; + // Make a set of all of the live table and blob files + std::vector live_table_files; + std::vector live_blob_files; for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->IsDropped()) { continue; } - cfd->current()->AddLiveFiles(&live); + cfd->current()->AddLiveFiles(&live_table_files, &live_blob_files); } ret.clear(); - ret.reserve(live.size() + 3); // *.sst + CURRENT + MANIFEST + OPTIONS + ret.reserve(live_table_files.size() + live_blob_files.size() + + 3); // for CURRENT + MANIFEST + OPTIONS // create names of the live files. The names are not absolute // paths, instead they are relative to dbname_; - for (const auto& live_file : live) { - ret.push_back(MakeTableFileName("", live_file.GetNumber())); + for (const auto& table_file_number : live_table_files) { + ret.emplace_back(MakeTableFileName("", table_file_number)); } - ret.push_back(CurrentFileName("")); - ret.push_back(DescriptorFileName("", versions_->manifest_file_number())); - ret.push_back(OptionsFileName("", versions_->options_file_number())); + for (const auto& blob_file_number : live_blob_files) { + ret.emplace_back(BlobFileName("", blob_file_number)); + } + + ret.emplace_back(CurrentFileName("")); + ret.emplace_back(DescriptorFileName("", versions_->manifest_file_number())); + // The OPTIONS file number is zero in read-write mode when OPTIONS file + // writing failed and the DB was configured with + // `fail_if_options_file_error == false`. In read-only mode the OPTIONS file + // number is zero when no OPTIONS file exist at all. In those cases we do not + // record any OPTIONS file in the live file list. + if (versions_->options_file_number() != 0) { + ret.emplace_back(OptionsFileName("", versions_->options_file_number())); + } // find length of manifest file while holding the mutex lock *manifest_file_size = versions_->manifest_file_size(); @@ -148,19 +124,33 @@ } Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { + // If caller disabled deletions, this function should return files that are + // guaranteed not to be deleted until deletions are re-enabled. We need to + // wait for pending purges to finish since WalManager doesn't know which + // files are going to be purged. Additional purges won't be scheduled as + // long as deletions are disabled (so the below loop must terminate). + // Also note that we disable deletions anyway to avoid the case where a + // file is deleted in the middle of the scan, causing IO error. + Status deletions_disabled = DisableFileDeletions(); { - // If caller disabled deletions, this function should return files that are - // guaranteed not to be deleted until deletions are re-enabled. We need to - // wait for pending purges to finish since WalManager doesn't know which - // files are going to be purged. Additional purges won't be scheduled as - // long as deletions are disabled (so the below loop must terminate). InstrumentedMutexLock l(&mutex_); - while (disable_delete_obsolete_files_ > 0 && - pending_purge_obsolete_files_ > 0) { + while (pending_purge_obsolete_files_ > 0 || bg_purge_scheduled_ > 0) { bg_cv_.Wait(); } } - return wal_manager_.GetSortedWalFiles(files); + + Status s = wal_manager_.GetSortedWalFiles(files); + + // DisableFileDeletions / EnableFileDeletions not supported in read-only DB + if (deletions_disabled.ok()) { + Status s2 = EnableFileDeletions(/*force*/ false); + assert(s2.ok()); + s2.PermitUncheckedError(); + } else { + assert(deletions_disabled.IsNotSupported()); + } + + return s; } Status DBImpl::GetCurrentWalFile(std::unique_ptr* current_log_file) { @@ -172,6 +162,245 @@ return wal_manager_.GetLiveWalFile(current_logfile_number, current_log_file); } + +Status DBImpl::GetLiveFilesStorageInfo( + const LiveFilesStorageInfoOptions& opts, + std::vector* files) { + // To avoid returning partial results, only move to ouput on success + assert(files); + files->clear(); + std::vector results; + + // NOTE: This implementation was largely migrated from Checkpoint. + + Status s; + VectorLogPtr live_wal_files; + bool flush_memtable = true; + if (!immutable_db_options_.allow_2pc) { + if (opts.wal_size_for_flush == port::kMaxUint64) { + flush_memtable = false; + } else if (opts.wal_size_for_flush > 0) { + // If out standing log files are small, we skip the flush. + s = GetSortedWalFiles(live_wal_files); + + if (!s.ok()) { + return s; + } + + // Don't flush column families if total log size is smaller than + // log_size_for_flush. We copy the log files instead. + // We may be able to cover 2PC case too. + uint64_t total_wal_size = 0; + for (auto& wal : live_wal_files) { + total_wal_size += wal->SizeFileBytes(); + } + if (total_wal_size < opts.wal_size_for_flush) { + flush_memtable = false; + } + live_wal_files.clear(); + } + } + + // This is a modified version of GetLiveFiles, to get access to more + // metadata. + mutex_.Lock(); + if (flush_memtable) { + Status status = FlushForGetLiveFiles(); + if (!status.ok()) { + mutex_.Unlock(); + ROCKS_LOG_ERROR(immutable_db_options_.info_log, "Cannot Flush data %s\n", + status.ToString().c_str()); + return status; + } + } + + // Make a set of all of the live table and blob files + for (auto cfd : *versions_->GetColumnFamilySet()) { + if (cfd->IsDropped()) { + continue; + } + VersionStorageInfo& vsi = *cfd->current()->storage_info(); + auto& cf_paths = cfd->ioptions()->cf_paths; + + auto GetDir = [&](size_t path_id) { + // Matching TableFileName() behavior + if (path_id >= cf_paths.size()) { + assert(false); + return cf_paths.back().path; + } else { + return cf_paths[path_id].path; + } + }; + + for (int level = 0; level < vsi.num_levels(); ++level) { + const auto& level_files = vsi.LevelFiles(level); + for (const auto& meta : level_files) { + assert(meta); + + results.emplace_back(); + LiveFileStorageInfo& info = results.back(); + + info.relative_filename = MakeTableFileName(meta->fd.GetNumber()); + info.directory = GetDir(meta->fd.GetPathId()); + info.file_number = meta->fd.GetNumber(); + info.file_type = kTableFile; + info.size = meta->fd.GetFileSize(); + if (opts.include_checksum_info) { + info.file_checksum_func_name = meta->file_checksum_func_name; + info.file_checksum = meta->file_checksum; + if (info.file_checksum_func_name.empty()) { + info.file_checksum_func_name = kUnknownFileChecksumFuncName; + info.file_checksum = kUnknownFileChecksum; + } + } + info.temperature = meta->temperature; + } + } + const auto& blob_files = vsi.GetBlobFiles(); + for (const auto& pair : blob_files) { + const auto& meta = pair.second; + assert(meta); + + results.emplace_back(); + LiveFileStorageInfo& info = results.back(); + + info.relative_filename = BlobFileName(meta->GetBlobFileNumber()); + info.directory = GetName(); // TODO?: support db_paths/cf_paths + info.file_number = meta->GetBlobFileNumber(); + info.file_type = kBlobFile; + info.size = meta->GetBlobFileSize(); + if (opts.include_checksum_info) { + info.file_checksum_func_name = meta->GetChecksumMethod(); + info.file_checksum = meta->GetChecksumValue(); + if (info.file_checksum_func_name.empty()) { + info.file_checksum_func_name = kUnknownFileChecksumFuncName; + info.file_checksum = kUnknownFileChecksum; + } + } + // TODO?: info.temperature + } + } + + // Capture some final info before releasing mutex + const uint64_t manifest_number = versions_->manifest_file_number(); + const uint64_t manifest_size = versions_->manifest_file_size(); + const uint64_t options_number = versions_->options_file_number(); + const uint64_t options_size = versions_->options_file_size_; + const uint64_t min_log_num = MinLogNumberToKeep(); + + mutex_.Unlock(); + + std::string manifest_fname = DescriptorFileName(manifest_number); + { // MANIFEST + results.emplace_back(); + LiveFileStorageInfo& info = results.back(); + + info.relative_filename = manifest_fname; + info.directory = GetName(); + info.file_number = manifest_number; + info.file_type = kDescriptorFile; + info.size = manifest_size; + info.trim_to_size = true; + if (opts.include_checksum_info) { + info.file_checksum_func_name = kUnknownFileChecksumFuncName; + info.file_checksum = kUnknownFileChecksum; + } + } + + { // CURRENT + results.emplace_back(); + LiveFileStorageInfo& info = results.back(); + + info.relative_filename = kCurrentFileName; + info.directory = GetName(); + info.file_type = kCurrentFile; + // CURRENT could be replaced so we have to record the contents we want + // for it + info.replacement_contents = manifest_fname + "\n"; + info.size = manifest_fname.size() + 1; + if (opts.include_checksum_info) { + info.file_checksum_func_name = kUnknownFileChecksumFuncName; + info.file_checksum = kUnknownFileChecksum; + } + } + + // The OPTIONS file number is zero in read-write mode when OPTIONS file + // writing failed and the DB was configured with + // `fail_if_options_file_error == false`. In read-only mode the OPTIONS file + // number is zero when no OPTIONS file exist at all. In those cases we do not + // record any OPTIONS file in the live file list. + if (options_number != 0) { + results.emplace_back(); + LiveFileStorageInfo& info = results.back(); + + info.relative_filename = OptionsFileName(options_number); + info.directory = GetName(); + info.file_number = options_number; + info.file_type = kOptionsFile; + info.size = options_size; + if (opts.include_checksum_info) { + info.file_checksum_func_name = kUnknownFileChecksumFuncName; + info.file_checksum = kUnknownFileChecksum; + } + } + + // Some legacy testing stuff TODO: carefully clean up obsolete parts + TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:FlushDone"); + + TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles1"); + TEST_SYNC_POINT("CheckpointImpl::CreateCheckpoint:SavedLiveFiles2"); + + if (s.ok()) { + s = FlushWAL(false /* sync */); + } + + TEST_SYNC_POINT("CheckpointImpl::CreateCustomCheckpoint:AfterGetLive1"); + TEST_SYNC_POINT("CheckpointImpl::CreateCustomCheckpoint:AfterGetLive2"); + + // if we have more than one column family, we need to also get WAL files + if (s.ok()) { + s = GetSortedWalFiles(live_wal_files); + } + if (!s.ok()) { + return s; + } + + size_t wal_size = live_wal_files.size(); + + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "Number of log files %" ROCKSDB_PRIszt, live_wal_files.size()); + + // Link WAL files. Copy exact size of last one because it is the only one + // that has changes after the last flush. + auto wal_dir = immutable_db_options_.GetWalDir(); + for (size_t i = 0; s.ok() && i < wal_size; ++i) { + if ((live_wal_files[i]->Type() == kAliveLogFile) && + (!flush_memtable || live_wal_files[i]->LogNumber() >= min_log_num)) { + results.emplace_back(); + LiveFileStorageInfo& info = results.back(); + auto f = live_wal_files[i]->PathName(); + assert(!f.empty() && f[0] == '/'); + info.relative_filename = f.substr(1); + info.directory = wal_dir; + info.file_number = live_wal_files[i]->LogNumber(); + info.file_type = kWalFile; + info.size = live_wal_files[i]->SizeFileBytes(); + // Only last should need to be trimmed + info.trim_to_size = (i + 1 == wal_size); + if (opts.include_checksum_info) { + info.file_checksum_func_name = kUnknownFileChecksumFuncName; + info.file_checksum = kUnknownFileChecksum; + } + } + } + + if (s.ok()) { + // Only move output on success + *files = std::move(results); + } + return s; +} + } // namespace ROCKSDB_NAMESPACE #endif // ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_flush_test.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_flush_test.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_flush_test.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_flush_test.cc 2025-05-19 16:14:27.000000000 +0000 @@ -8,21 +8,31 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include +#include #include "db/db_impl/db_impl.h" #include "db/db_test_util.h" +#include "env/mock_env.h" +#include "file/filename.h" #include "port/port.h" #include "port/stack_trace.h" -#include "test_util/fault_injection_test_env.h" +#include "rocksdb/utilities/transaction_db.h" #include "test_util/sync_point.h" +#include "test_util/testutil.h" #include "util/cast_util.h" #include "util/mutexlock.h" +#include "utilities/fault_injection_env.h" +#include "utilities/fault_injection_fs.h" namespace ROCKSDB_NAMESPACE { +// This is a static filter used for filtering +// kvs during the compaction process. +static std::string NEW_VALUE = "NewValue"; + class DBFlushTest : public DBTestBase { public: - DBFlushTest() : DBTestBase("/db_flush_test") {} + DBFlushTest() : DBTestBase("db_flush_test", /*env_do_fsync=*/true) {} }; class DBFlushDirectIOTest : public DBFlushTest, @@ -62,7 +72,7 @@ ASSERT_OK(Put("bar", "v")); ASSERT_OK(dbfull()->Flush(no_wait)); // If the issue is hit we will wait here forever. - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); #ifndef ROCKSDB_LITE ASSERT_EQ(2, TotalTableFiles()); #endif // ROCKSDB_LITE @@ -78,41 +88,26 @@ options.env = fault_injection_env.get(); SyncPoint::GetInstance()->LoadDependency( - {{"DBFlushTest::SyncFail:GetVersionRefCount:1", - "DBImpl::FlushMemTableToOutputFile:BeforePickMemtables"}, - {"DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", - "DBFlushTest::SyncFail:GetVersionRefCount:2"}, - {"DBFlushTest::SyncFail:1", "DBImpl::SyncClosedLogs:Start"}, + {{"DBFlushTest::SyncFail:1", "DBImpl::SyncClosedLogs:Start"}, {"DBImpl::SyncClosedLogs:Failed", "DBFlushTest::SyncFail:2"}}); SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu"}, options); - Put("key", "value"); - auto* cfd = - reinterpret_cast(db_->DefaultColumnFamily()) - ->cfd(); + ASSERT_OK(Put("key", "value")); FlushOptions flush_options; flush_options.wait = false; ASSERT_OK(dbfull()->Flush(flush_options)); // Flush installs a new super-version. Get the ref count after that. - auto current_before = cfd->current(); - int refs_before = cfd->current()->TEST_refs(); - TEST_SYNC_POINT("DBFlushTest::SyncFail:GetVersionRefCount:1"); - TEST_SYNC_POINT("DBFlushTest::SyncFail:GetVersionRefCount:2"); - int refs_after_picking_memtables = cfd->current()->TEST_refs(); - ASSERT_EQ(refs_before + 1, refs_after_picking_memtables); fault_injection_env->SetFilesystemActive(false); TEST_SYNC_POINT("DBFlushTest::SyncFail:1"); TEST_SYNC_POINT("DBFlushTest::SyncFail:2"); fault_injection_env->SetFilesystemActive(true); // Now the background job will do the flush; wait for it. - dbfull()->TEST_WaitForFlushMemTable(); + // Returns the IO error happend during flush. + ASSERT_NOK(dbfull()->TEST_WaitForFlushMemTable()); #ifndef ROCKSDB_LITE ASSERT_EQ("", FilesPerLevel()); // flush failed. #endif // ROCKSDB_LITE - // Backgroun flush job should release ref count to current version. - ASSERT_EQ(current_before, cfd->current()); - ASSERT_EQ(refs_before, cfd->current()->TEST_refs()); Destroy(options); } @@ -125,7 +120,7 @@ SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); - Put("key", "value"); + ASSERT_OK(Put("key", "value")); FlushOptions flush_options; flush_options.wait = false; @@ -135,7 +130,7 @@ TEST_SYNC_POINT("DBFlushTest::SyncSkip:2"); // Now the background job will do the flush; wait for it. - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); Destroy(options); } @@ -145,7 +140,7 @@ // scheduled in the low-pri (compaction) thread pool. Options options = CurrentOptions(); options.level0_file_num_compaction_trigger = 4; - options.memtable_factory.reset(new SpecialSkipListFactory(1)); + options.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); Reopen(options); env_->SetBackgroundThreads(0, Env::HIGH); @@ -170,13 +165,73 @@ ASSERT_OK(Put("key", "val")); for (int i = 0; i < 4; ++i) { ASSERT_OK(Put("key", "val")); - dbfull()->TEST_WaitForFlushMemTable(); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); } - dbfull()->TEST_WaitForCompact(); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(4, num_flushes); ASSERT_EQ(1, num_compactions); } +// Test when flush job is submitted to low priority thread pool and when DB is +// closed in the meanwhile, CloseHelper doesn't hang. +TEST_F(DBFlushTest, CloseDBWhenFlushInLowPri) { + Options options = CurrentOptions(); + options.max_background_flushes = 1; + options.max_total_wal_size = 8192; + + DestroyAndReopen(options); + CreateColumnFamilies({"cf1", "cf2"}, options); + + env_->SetBackgroundThreads(0, Env::HIGH); + env_->SetBackgroundThreads(1, Env::LOW); + test::SleepingBackgroundTask sleeping_task_low; + int num_flushes = 0; + + SyncPoint::GetInstance()->SetCallBack("DBImpl::BGWorkFlush", + [&](void* /*arg*/) { ++num_flushes; }); + + int num_low_flush_unscheduled = 0; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::UnscheduleLowFlushCallback", [&](void* /*arg*/) { + num_low_flush_unscheduled++; + // There should be one flush job in low pool that needs to be + // unscheduled + ASSERT_EQ(num_low_flush_unscheduled, 1); + }); + + int num_high_flush_unscheduled = 0; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::UnscheduleHighFlushCallback", [&](void* /*arg*/) { + num_high_flush_unscheduled++; + // There should be no flush job in high pool + ASSERT_EQ(num_high_flush_unscheduled, 0); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_OK(Put(0, "key1", DummyString(8192))); + // Block thread so that flush cannot be run and can be removed from the queue + // when called Unschedule. + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low, + Env::Priority::LOW); + sleeping_task_low.WaitUntilSleeping(); + + // Trigger flush and flush job will be scheduled to LOW priority thread. + ASSERT_OK(Put(0, "key2", DummyString(8192))); + + // Close DB and flush job in low priority queue will be removed without + // running. + Close(); + sleeping_task_low.WakeUp(); + sleeping_task_low.WaitUntilDone(); + ASSERT_EQ(0, num_flushes); + + TryReopenWithColumnFamilies({"default", "cf1", "cf2"}, options); + ASSERT_OK(Put(0, "key3", DummyString(8192))); + ASSERT_OK(Flush(0)); + ASSERT_EQ(1, num_flushes); +} + TEST_F(DBFlushTest, ManualFlushWithMinWriteBufferNumberToMerge) { Options options = CurrentOptions(); options.write_buffer_size = 100; @@ -236,13 +291,1096 @@ SyncPoint::GetInstance()->ClearAllCallBacks(); } +// The following 3 tests are designed for testing garbage statistics at flush +// time. +// +// ======= General Information ======= (from GitHub Wiki). +// There are three scenarios where memtable flush can be triggered: +// +// 1 - Memtable size exceeds ColumnFamilyOptions::write_buffer_size +// after a write. +// 2 - Total memtable size across all column families exceeds +// DBOptions::db_write_buffer_size, +// or DBOptions::write_buffer_manager signals a flush. In this scenario +// the largest memtable will be flushed. +// 3 - Total WAL file size exceeds DBOptions::max_total_wal_size. +// In this scenario the memtable with the oldest data will be flushed, +// in order to allow the WAL file with data from this memtable to be +// purged. +// +// As a result, a memtable can be flushed before it is full. This is one +// reason the generated SST file can be smaller than the corresponding +// memtable. Compression is another factor to make SST file smaller than +// corresponding memtable, since data in memtable is uncompressed. + +TEST_F(DBFlushTest, StatisticsGarbageBasic) { + Options options = CurrentOptions(); + + // The following options are used to enforce several values that + // may already exist as default values to make this test resilient + // to default value updates in the future. + options.statistics = CreateDBStatistics(); + + // Record all statistics. + options.statistics->set_stats_level(StatsLevel::kAll); + + // create the DB if it's not already present + options.create_if_missing = true; + + // Useful for now as we are trying to compare uncompressed data savings on + // flush(). + options.compression = kNoCompression; + + // Prevent memtable in place updates. Should already be disabled + // (from Wiki: + // In place updates can be enabled by toggling on the bool + // inplace_update_support flag. However, this flag is by default set to + // false + // because this thread-safe in-place update support is not compatible + // with concurrent memtable writes. Note that the bool + // allow_concurrent_memtable_write is set to true by default ) + options.inplace_update_support = false; + options.allow_concurrent_memtable_write = true; + + // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). + options.write_buffer_size = 64 << 20; + + ASSERT_OK(TryReopen(options)); + + // Put multiple times the same key-values. + // The encoded length of a db entry in the memtable is + // defined in db/memtable.cc (MemTable::Add) as the variable: + // encoded_len= VarintLength(internal_key_size) --> = + // log_256(internal_key). + // Min # of bytes + // necessary to + // store + // internal_key_size. + // + internal_key_size --> = actual key string, + // (size key_size: w/o term null char) + // + 8 bytes for + // fixed uint64 "seq + // number + // + + // insertion type" + // + VarintLength(val_size) --> = min # of bytes to + // store val_size + // + val_size --> = actual value + // string + // For example, in our situation, "key1" : size 4, "value1" : size 6 + // (the terminating null characters are not copied over to the memtable). + // And therefore encoded_len = 1 + (4+8) + 1 + 6 = 20 bytes per entry. + // However in terms of raw data contained in the memtable, and written + // over to the SSTable, we only count internal_key_size and val_size, + // because this is the only raw chunk of bytes that contains everything + // necessary to reconstruct a user entry: sequence number, insertion type, + // key, and value. + + // To test the relevance of our Memtable garbage statistics, + // namely MEMTABLE_PAYLOAD_BYTES_AT_FLUSH and MEMTABLE_GARBAGE_BYTES_AT_FLUSH, + // we insert K-V pairs with 3 distinct keys (of length 4), + // and random values of arbitrary length RAND_VALUES_LENGTH, + // and we repeat this step NUM_REPEAT times total. + // At the end, we insert 3 final K-V pairs with the same 3 keys + // and known values (these will be the final values, of length 6). + // I chose NUM_REPEAT=2,000 such that no automatic flush is + // triggered (the number of bytes in the memtable is therefore + // well below any meaningful heuristic for a memtable of size 64MB). + // As a result, since each K-V pair is inserted as a payload + // of N meaningful bytes (sequence number, insertion type, + // key, and value = 8 + 4 + RAND_VALUE_LENGTH), + // MEMTABLE_GARBAGE_BYTES_AT_FLUSH should be equal to 2,000 * N bytes + // and MEMTABLE_PAYLAOD_BYTES_AT_FLUSH = MEMTABLE_GARBAGE_BYTES_AT_FLUSH + + // (3*(8 + 4 + 6)) bytes. For RAND_VALUE_LENGTH = 172 (arbitrary value), we + // expect: + // N = 8 + 4 + 172 = 184 bytes + // MEMTABLE_GARBAGE_BYTES_AT_FLUSH = 2,000 * 184 = 368,000 bytes. + // MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = 368,000 + 3*18 = 368,054 bytes. + + const size_t NUM_REPEAT = 2000; + const size_t RAND_VALUES_LENGTH = 172; + const std::string KEY1 = "key1"; + const std::string KEY2 = "key2"; + const std::string KEY3 = "key3"; + const std::string VALUE1 = "value1"; + const std::string VALUE2 = "value2"; + const std::string VALUE3 = "value3"; + uint64_t EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = 0; + uint64_t EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH = 0; + + Random rnd(301); + // Insertion of of K-V pairs, multiple times. + for (size_t i = 0; i < NUM_REPEAT; i++) { + // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. + std::string p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); + std::string p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); + std::string p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); + ASSERT_OK(Put(KEY1, p_v1)); + ASSERT_OK(Put(KEY2, p_v2)); + ASSERT_OK(Put(KEY3, p_v3)); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY1.size() + p_v1.size() + sizeof(uint64_t); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY2.size() + p_v2.size() + sizeof(uint64_t); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY3.size() + p_v3.size() + sizeof(uint64_t); + } + + // The memtable data bytes includes the "garbage" + // bytes along with the useful payload. + EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH; + + ASSERT_OK(Put(KEY1, VALUE1)); + ASSERT_OK(Put(KEY2, VALUE2)); + ASSERT_OK(Put(KEY3, VALUE3)); + + // Add useful payload to the memtable data bytes: + EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH += + KEY1.size() + VALUE1.size() + KEY2.size() + VALUE2.size() + KEY3.size() + + VALUE3.size() + 3 * sizeof(uint64_t); + + // We assert that the last K-V pairs have been successfully inserted, + // and that the valid values are VALUE1, VALUE2, VALUE3. + PinnableSlice value; + ASSERT_OK(Get(KEY1, &value)); + ASSERT_EQ(value.ToString(), VALUE1); + ASSERT_OK(Get(KEY2, &value)); + ASSERT_EQ(value.ToString(), VALUE2); + ASSERT_OK(Get(KEY3, &value)); + ASSERT_EQ(value.ToString(), VALUE3); + + // Force flush to SST. Increments the statistics counter. + ASSERT_OK(Flush()); + + // Collect statistics. + uint64_t mem_data_bytes = + TestGetTickerCount(options, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); + uint64_t mem_garbage_bytes = + TestGetTickerCount(options, MEMTABLE_GARBAGE_BYTES_AT_FLUSH); + + EXPECT_EQ(mem_data_bytes, EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); + EXPECT_EQ(mem_garbage_bytes, EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH); + + Close(); +} + +TEST_F(DBFlushTest, StatisticsGarbageInsertAndDeletes) { + Options options = CurrentOptions(); + options.statistics = CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kAll); + options.create_if_missing = true; + options.compression = kNoCompression; + options.inplace_update_support = false; + options.allow_concurrent_memtable_write = true; + options.write_buffer_size = 67108864; + + ASSERT_OK(TryReopen(options)); + + const size_t NUM_REPEAT = 2000; + const size_t RAND_VALUES_LENGTH = 37; + const std::string KEY1 = "key1"; + const std::string KEY2 = "key2"; + const std::string KEY3 = "key3"; + const std::string KEY4 = "key4"; + const std::string KEY5 = "key5"; + const std::string KEY6 = "key6"; + + uint64_t EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = 0; + uint64_t EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH = 0; + + WriteBatch batch; + + Random rnd(301); + // Insertion of of K-V pairs, multiple times. + for (size_t i = 0; i < NUM_REPEAT; i++) { + // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. + std::string p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); + std::string p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); + std::string p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); + ASSERT_OK(Put(KEY1, p_v1)); + ASSERT_OK(Put(KEY2, p_v2)); + ASSERT_OK(Put(KEY3, p_v3)); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY1.size() + p_v1.size() + sizeof(uint64_t); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY2.size() + p_v2.size() + sizeof(uint64_t); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY3.size() + p_v3.size() + sizeof(uint64_t); + ASSERT_OK(Delete(KEY1)); + ASSERT_OK(Delete(KEY2)); + ASSERT_OK(Delete(KEY3)); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY1.size() + KEY2.size() + KEY3.size() + 3 * sizeof(uint64_t); + } + + // The memtable data bytes includes the "garbage" + // bytes along with the useful payload. + EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH; + + // Note : one set of delete for KEY1, KEY2, KEY3 is written to + // SSTable to propagate the delete operations to K-V pairs + // that could have been inserted into the database during past Flush + // opeartions. + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH -= + KEY1.size() + KEY2.size() + KEY3.size() + 3 * sizeof(uint64_t); + + // Additional useful paylaod. + ASSERT_OK(Delete(KEY4)); + ASSERT_OK(Delete(KEY5)); + ASSERT_OK(Delete(KEY6)); + + // // Add useful payload to the memtable data bytes: + EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH += + KEY4.size() + KEY5.size() + KEY6.size() + 3 * sizeof(uint64_t); + + // We assert that the K-V pairs have been successfully deleted. + PinnableSlice value; + ASSERT_NOK(Get(KEY1, &value)); + ASSERT_NOK(Get(KEY2, &value)); + ASSERT_NOK(Get(KEY3, &value)); + + // Force flush to SST. Increments the statistics counter. + ASSERT_OK(Flush()); + + // Collect statistics. + uint64_t mem_data_bytes = + TestGetTickerCount(options, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); + uint64_t mem_garbage_bytes = + TestGetTickerCount(options, MEMTABLE_GARBAGE_BYTES_AT_FLUSH); + + EXPECT_EQ(mem_data_bytes, EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); + EXPECT_EQ(mem_garbage_bytes, EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH); + + Close(); +} + +TEST_F(DBFlushTest, StatisticsGarbageRangeDeletes) { + Options options = CurrentOptions(); + options.statistics = CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kAll); + options.create_if_missing = true; + options.compression = kNoCompression; + options.inplace_update_support = false; + options.allow_concurrent_memtable_write = true; + options.write_buffer_size = 67108864; + + ASSERT_OK(TryReopen(options)); + + const size_t NUM_REPEAT = 1000; + const size_t RAND_VALUES_LENGTH = 42; + const std::string KEY1 = "key1"; + const std::string KEY2 = "key2"; + const std::string KEY3 = "key3"; + const std::string KEY4 = "key4"; + const std::string KEY5 = "key5"; + const std::string KEY6 = "key6"; + const std::string VALUE3 = "value3"; + + uint64_t EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = 0; + uint64_t EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH = 0; + + Random rnd(301); + // Insertion of of K-V pairs, multiple times. + // Also insert DeleteRange + for (size_t i = 0; i < NUM_REPEAT; i++) { + // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. + std::string p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); + std::string p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); + std::string p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); + ASSERT_OK(Put(KEY1, p_v1)); + ASSERT_OK(Put(KEY2, p_v2)); + ASSERT_OK(Put(KEY3, p_v3)); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY1.size() + p_v1.size() + sizeof(uint64_t); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY2.size() + p_v2.size() + sizeof(uint64_t); + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + KEY3.size() + p_v3.size() + sizeof(uint64_t); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY1, + KEY2)); + // Note: DeleteRange have an exclusive upper bound, e.g. here: [KEY2,KEY3) + // is deleted. + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY2, + KEY3)); + // Delete ranges are stored as a regular K-V pair, with key=STARTKEY, + // value=ENDKEY. + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH += + (KEY1.size() + KEY2.size() + sizeof(uint64_t)) + + (KEY2.size() + KEY3.size() + sizeof(uint64_t)); + } + + // The memtable data bytes includes the "garbage" + // bytes along with the useful payload. + EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH = + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH; + + // Note : one set of deleteRange for (KEY1, KEY2) and (KEY2, KEY3) is written + // to SSTable to propagate the deleteRange operations to K-V pairs that could + // have been inserted into the database during past Flush opeartions. + EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH -= + (KEY1.size() + KEY2.size() + sizeof(uint64_t)) + + (KEY2.size() + KEY3.size() + sizeof(uint64_t)); + + // Overwrite KEY3 with known value (VALUE3) + // Note that during the whole time KEY3 has never been deleted + // by the RangeDeletes. + ASSERT_OK(Put(KEY3, VALUE3)); + EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH += + KEY3.size() + VALUE3.size() + sizeof(uint64_t); + + // Additional useful paylaod. + ASSERT_OK( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY4, KEY5)); + ASSERT_OK( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY5, KEY6)); + + // Add useful payload to the memtable data bytes: + EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH += + (KEY4.size() + KEY5.size() + sizeof(uint64_t)) + + (KEY5.size() + KEY6.size() + sizeof(uint64_t)); + + // We assert that the K-V pairs have been successfully deleted. + PinnableSlice value; + ASSERT_NOK(Get(KEY1, &value)); + ASSERT_NOK(Get(KEY2, &value)); + // And that KEY3's value is correct. + ASSERT_OK(Get(KEY3, &value)); + ASSERT_EQ(value, VALUE3); + + // Force flush to SST. Increments the statistics counter. + ASSERT_OK(Flush()); + + // Collect statistics. + uint64_t mem_data_bytes = + TestGetTickerCount(options, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); + uint64_t mem_garbage_bytes = + TestGetTickerCount(options, MEMTABLE_GARBAGE_BYTES_AT_FLUSH); + + EXPECT_EQ(mem_data_bytes, EXPECTED_MEMTABLE_PAYLOAD_BYTES_AT_FLUSH); + EXPECT_EQ(mem_garbage_bytes, EXPECTED_MEMTABLE_GARBAGE_BYTES_AT_FLUSH); + + Close(); +} + +#ifndef ROCKSDB_LITE +// This simple Listener can only handle one flush at a time. +class TestFlushListener : public EventListener { + public: + TestFlushListener(Env* env, DBFlushTest* test) + : slowdown_count(0), stop_count(0), db_closed(), env_(env), test_(test) { + db_closed = false; + } + + ~TestFlushListener() override { + prev_fc_info_.status.PermitUncheckedError(); // Ignore the status + } + + void OnTableFileCreated(const TableFileCreationInfo& info) override { + // remember the info for later checking the FlushJobInfo. + prev_fc_info_ = info; + ASSERT_GT(info.db_name.size(), 0U); + ASSERT_GT(info.cf_name.size(), 0U); + ASSERT_GT(info.file_path.size(), 0U); + ASSERT_GT(info.job_id, 0); + ASSERT_GT(info.table_properties.data_size, 0U); + ASSERT_GT(info.table_properties.raw_key_size, 0U); + ASSERT_GT(info.table_properties.raw_value_size, 0U); + ASSERT_GT(info.table_properties.num_data_blocks, 0U); + ASSERT_GT(info.table_properties.num_entries, 0U); + ASSERT_EQ(info.file_checksum, kUnknownFileChecksum); + ASSERT_EQ(info.file_checksum_func_name, kUnknownFileChecksumFuncName); + } + + void OnFlushCompleted(DB* db, const FlushJobInfo& info) override { + flushed_dbs_.push_back(db); + flushed_column_family_names_.push_back(info.cf_name); + if (info.triggered_writes_slowdown) { + slowdown_count++; + } + if (info.triggered_writes_stop) { + stop_count++; + } + // verify whether the previously created file matches the flushed file. + ASSERT_EQ(prev_fc_info_.db_name, db->GetName()); + ASSERT_EQ(prev_fc_info_.cf_name, info.cf_name); + ASSERT_EQ(prev_fc_info_.job_id, info.job_id); + ASSERT_EQ(prev_fc_info_.file_path, info.file_path); + ASSERT_EQ(TableFileNameToNumber(info.file_path), info.file_number); + + // Note: the following chunk relies on the notification pertaining to the + // database pointed to by DBTestBase::db_, and is thus bypassed when + // that assumption does not hold (see the test case MultiDBMultiListeners + // below). + ASSERT_TRUE(test_); + if (db == test_->db_) { + std::vector> files_by_level; + test_->dbfull()->TEST_GetFilesMetaData(db->DefaultColumnFamily(), + &files_by_level); + + ASSERT_FALSE(files_by_level.empty()); + auto it = std::find_if(files_by_level[0].begin(), files_by_level[0].end(), + [&](const FileMetaData& meta) { + return meta.fd.GetNumber() == info.file_number; + }); + ASSERT_NE(it, files_by_level[0].end()); + ASSERT_EQ(info.oldest_blob_file_number, it->oldest_blob_file_number); + } + + ASSERT_EQ(db->GetEnv()->GetThreadID(), info.thread_id); + ASSERT_GT(info.thread_id, 0U); + } + + std::vector flushed_column_family_names_; + std::vector flushed_dbs_; + int slowdown_count; + int stop_count; + bool db_closing; + std::atomic_bool db_closed; + TableFileCreationInfo prev_fc_info_; + + protected: + Env* env_; + DBFlushTest* test_; +}; +#endif // !ROCKSDB_LITE + +TEST_F(DBFlushTest, MemPurgeBasic) { + Options options = CurrentOptions(); + + // The following options are used to enforce several values that + // may already exist as default values to make this test resilient + // to default value updates in the future. + options.statistics = CreateDBStatistics(); + + // Record all statistics. + options.statistics->set_stats_level(StatsLevel::kAll); + + // create the DB if it's not already present + options.create_if_missing = true; + + // Useful for now as we are trying to compare uncompressed data savings on + // flush(). + options.compression = kNoCompression; + + // Prevent memtable in place updates. Should already be disabled + // (from Wiki: + // In place updates can be enabled by toggling on the bool + // inplace_update_support flag. However, this flag is by default set to + // false + // because this thread-safe in-place update support is not compatible + // with concurrent memtable writes. Note that the bool + // allow_concurrent_memtable_write is set to true by default ) + options.inplace_update_support = false; + options.allow_concurrent_memtable_write = true; + + // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). + options.write_buffer_size = 1 << 20; + // Activate the MemPurge prototype. + options.experimental_mempurge_threshold = 1.0; +#ifndef ROCKSDB_LITE + TestFlushListener* listener = new TestFlushListener(options.env, this); + options.listeners.emplace_back(listener); +#endif // !ROCKSDB_LITE + ASSERT_OK(TryReopen(options)); + std::atomic mempurge_count{0}; + std::atomic sst_count{0}; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushJob:MemPurgeSuccessful", + [&](void* /*arg*/) { mempurge_count++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + std::string KEY1 = "IamKey1"; + std::string KEY2 = "IamKey2"; + std::string KEY3 = "IamKey3"; + std::string KEY4 = "IamKey4"; + std::string KEY5 = "IamKey5"; + std::string KEY6 = "IamKey6"; + std::string KEY7 = "IamKey7"; + std::string KEY8 = "IamKey8"; + std::string KEY9 = "IamKey9"; + std::string RNDKEY1, RNDKEY2, RNDKEY3; + const std::string NOT_FOUND = "NOT_FOUND"; + + // Heavy overwrite workload, + // more than would fit in maximum allowed memtables. + Random rnd(719); + const size_t NUM_REPEAT = 100; + const size_t RAND_KEYS_LENGTH = 57; + const size_t RAND_VALUES_LENGTH = 10240; + std::string p_v1, p_v2, p_v3, p_v4, p_v5, p_v6, p_v7, p_v8, p_v9, p_rv1, + p_rv2, p_rv3; + + // Insert a very first set of keys that will be + // mempurged at least once. + p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v4 = rnd.RandomString(RAND_VALUES_LENGTH); + ASSERT_OK(Put(KEY1, p_v1)); + ASSERT_OK(Put(KEY2, p_v2)); + ASSERT_OK(Put(KEY3, p_v3)); + ASSERT_OK(Put(KEY4, p_v4)); + ASSERT_EQ(Get(KEY1), p_v1); + ASSERT_EQ(Get(KEY2), p_v2); + ASSERT_EQ(Get(KEY3), p_v3); + ASSERT_EQ(Get(KEY4), p_v4); + + // Insertion of of K-V pairs, multiple times (overwrites). + for (size_t i = 0; i < NUM_REPEAT; i++) { + // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. + p_v5 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v6 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v7 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v8 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v9 = rnd.RandomString(RAND_VALUES_LENGTH); + + ASSERT_OK(Put(KEY5, p_v5)); + ASSERT_OK(Put(KEY6, p_v6)); + ASSERT_OK(Put(KEY7, p_v7)); + ASSERT_OK(Put(KEY8, p_v8)); + ASSERT_OK(Put(KEY9, p_v9)); + + ASSERT_EQ(Get(KEY1), p_v1); + ASSERT_EQ(Get(KEY2), p_v2); + ASSERT_EQ(Get(KEY3), p_v3); + ASSERT_EQ(Get(KEY4), p_v4); + ASSERT_EQ(Get(KEY5), p_v5); + ASSERT_EQ(Get(KEY6), p_v6); + ASSERT_EQ(Get(KEY7), p_v7); + ASSERT_EQ(Get(KEY8), p_v8); + ASSERT_EQ(Get(KEY9), p_v9); + } + + // Check that there was at least one mempurge + const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; + // Check that there was no SST files created during flush. + const uint32_t EXPECTED_SST_COUNT = 0; + + EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); + EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); + + // Insertion of of K-V pairs, no overwrites. + for (size_t i = 0; i < NUM_REPEAT; i++) { + // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. + RNDKEY1 = rnd.RandomString(RAND_KEYS_LENGTH); + RNDKEY2 = rnd.RandomString(RAND_KEYS_LENGTH); + RNDKEY3 = rnd.RandomString(RAND_KEYS_LENGTH); + p_rv1 = rnd.RandomString(RAND_VALUES_LENGTH); + p_rv2 = rnd.RandomString(RAND_VALUES_LENGTH); + p_rv3 = rnd.RandomString(RAND_VALUES_LENGTH); + + ASSERT_OK(Put(RNDKEY1, p_rv1)); + ASSERT_OK(Put(RNDKEY2, p_rv2)); + ASSERT_OK(Put(RNDKEY3, p_rv3)); + + ASSERT_EQ(Get(KEY1), p_v1); + ASSERT_EQ(Get(KEY2), p_v2); + ASSERT_EQ(Get(KEY3), p_v3); + ASSERT_EQ(Get(KEY4), p_v4); + ASSERT_EQ(Get(KEY5), p_v5); + ASSERT_EQ(Get(KEY6), p_v6); + ASSERT_EQ(Get(KEY7), p_v7); + ASSERT_EQ(Get(KEY8), p_v8); + ASSERT_EQ(Get(KEY9), p_v9); + ASSERT_EQ(Get(RNDKEY1), p_rv1); + ASSERT_EQ(Get(RNDKEY2), p_rv2); + ASSERT_EQ(Get(RNDKEY3), p_rv3); + } + + // Assert that at least one flush to storage has been performed + EXPECT_GT(sst_count.exchange(0), EXPECTED_SST_COUNT); + // (which will consequently increase the number of mempurges recorded too). + EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); + + // Assert that there is no data corruption, even with + // a flush to storage. + ASSERT_EQ(Get(KEY1), p_v1); + ASSERT_EQ(Get(KEY2), p_v2); + ASSERT_EQ(Get(KEY3), p_v3); + ASSERT_EQ(Get(KEY4), p_v4); + ASSERT_EQ(Get(KEY5), p_v5); + ASSERT_EQ(Get(KEY6), p_v6); + ASSERT_EQ(Get(KEY7), p_v7); + ASSERT_EQ(Get(KEY8), p_v8); + ASSERT_EQ(Get(KEY9), p_v9); + ASSERT_EQ(Get(RNDKEY1), p_rv1); + ASSERT_EQ(Get(RNDKEY2), p_rv2); + ASSERT_EQ(Get(RNDKEY3), p_rv3); + + Close(); +} + +TEST_F(DBFlushTest, MemPurgeDeleteAndDeleteRange) { + Options options = CurrentOptions(); + + options.statistics = CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kAll); + options.create_if_missing = true; + options.compression = kNoCompression; + options.inplace_update_support = false; + options.allow_concurrent_memtable_write = true; +#ifndef ROCKSDB_LITE + TestFlushListener* listener = new TestFlushListener(options.env, this); + options.listeners.emplace_back(listener); +#endif // !ROCKSDB_LITE + // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). + options.write_buffer_size = 1 << 20; + // Activate the MemPurge prototype. + options.experimental_mempurge_threshold = 1.0; + + ASSERT_OK(TryReopen(options)); + + std::atomic mempurge_count{0}; + std::atomic sst_count{0}; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushJob:MemPurgeSuccessful", + [&](void* /*arg*/) { mempurge_count++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + std::string KEY1 = "ThisIsKey1"; + std::string KEY2 = "ThisIsKey2"; + std::string KEY3 = "ThisIsKey3"; + std::string KEY4 = "ThisIsKey4"; + std::string KEY5 = "ThisIsKey5"; + const std::string NOT_FOUND = "NOT_FOUND"; + + Random rnd(117); + const size_t NUM_REPEAT = 100; + const size_t RAND_VALUES_LENGTH = 10240; + + std::string key, value, p_v1, p_v2, p_v3, p_v3b, p_v4, p_v5; + int count = 0; + const int EXPECTED_COUNT_FORLOOP = 3; + const int EXPECTED_COUNT_END = 4; + + ReadOptions ropt; + ropt.pin_data = true; + ropt.total_order_seek = true; + Iterator* iter = nullptr; + + // Insertion of of K-V pairs, multiple times. + // Also insert DeleteRange + for (size_t i = 0; i < NUM_REPEAT; i++) { + // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. + p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v3b = rnd.RandomString(RAND_VALUES_LENGTH); + p_v4 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v5 = rnd.RandomString(RAND_VALUES_LENGTH); + ASSERT_OK(Put(KEY1, p_v1)); + ASSERT_OK(Put(KEY2, p_v2)); + ASSERT_OK(Put(KEY3, p_v3)); + ASSERT_OK(Put(KEY4, p_v4)); + ASSERT_OK(Put(KEY5, p_v5)); + ASSERT_OK(Delete(KEY2)); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY2, + KEY4)); + ASSERT_OK(Put(KEY3, p_v3b)); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), KEY1, + KEY3)); + ASSERT_OK(Delete(KEY1)); + + ASSERT_EQ(Get(KEY1), NOT_FOUND); + ASSERT_EQ(Get(KEY2), NOT_FOUND); + ASSERT_EQ(Get(KEY3), p_v3b); + ASSERT_EQ(Get(KEY4), p_v4); + ASSERT_EQ(Get(KEY5), p_v5); + + iter = db_->NewIterator(ropt); + iter->SeekToFirst(); + count = 0; + for (; iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + key = (iter->key()).ToString(false); + value = (iter->value()).ToString(false); + if (key.compare(KEY3) == 0) + ASSERT_EQ(value, p_v3b); + else if (key.compare(KEY4) == 0) + ASSERT_EQ(value, p_v4); + else if (key.compare(KEY5) == 0) + ASSERT_EQ(value, p_v5); + else + ASSERT_EQ(value, NOT_FOUND); + count++; + } + + // Expected count here is 3: KEY3, KEY4, KEY5. + ASSERT_EQ(count, EXPECTED_COUNT_FORLOOP); + if (iter) { + delete iter; + } + } + + // Check that there was at least one mempurge + const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; + // Check that there was no SST files created during flush. + const uint32_t EXPECTED_SST_COUNT = 0; + + EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); + EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); + + // Additional test for the iterator+memPurge. + ASSERT_OK(Put(KEY2, p_v2)); + iter = db_->NewIterator(ropt); + iter->SeekToFirst(); + ASSERT_OK(Put(KEY4, p_v4)); + count = 0; + for (; iter->Valid(); iter->Next()) { + ASSERT_OK(iter->status()); + key = (iter->key()).ToString(false); + value = (iter->value()).ToString(false); + if (key.compare(KEY2) == 0) + ASSERT_EQ(value, p_v2); + else if (key.compare(KEY3) == 0) + ASSERT_EQ(value, p_v3b); + else if (key.compare(KEY4) == 0) + ASSERT_EQ(value, p_v4); + else if (key.compare(KEY5) == 0) + ASSERT_EQ(value, p_v5); + else + ASSERT_EQ(value, NOT_FOUND); + count++; + } + + // Expected count here is 4: KEY2, KEY3, KEY4, KEY5. + ASSERT_EQ(count, EXPECTED_COUNT_END); + if (iter) delete iter; + + Close(); +} + +// Create a Compaction Fitler that will be invoked +// at flush time and will update the value of a KV pair +// if the key string is "lower" than the filter_key_ string. +class ConditionalUpdateFilter : public CompactionFilter { + public: + explicit ConditionalUpdateFilter(const std::string* filtered_key) + : filtered_key_(filtered_key) {} + bool Filter(int /*level*/, const Slice& key, const Slice& /*value*/, + std::string* new_value, bool* value_changed) const override { + // If key CreateCompactionFilter( + const CompactionFilter::Context& /*context*/) override { + return std::unique_ptr( + new ConditionalUpdateFilter(&filtered_key_)); + } + + const char* Name() const override { return "ConditionalUpdateFilterFactory"; } + + bool ShouldFilterTableFileCreation( + TableFileCreationReason reason) const override { + // This compaction filter will be invoked + // at flush time (and therefore at MemPurge time). + return (reason == TableFileCreationReason::kFlush); + } + + private: + std::string filtered_key_; +}; + +TEST_F(DBFlushTest, MemPurgeAndCompactionFilter) { + Options options = CurrentOptions(); + + std::string KEY1 = "ThisIsKey1"; + std::string KEY2 = "ThisIsKey2"; + std::string KEY3 = "ThisIsKey3"; + std::string KEY4 = "ThisIsKey4"; + std::string KEY5 = "ThisIsKey5"; + std::string KEY6 = "ThisIsKey6"; + std::string KEY7 = "ThisIsKey7"; + std::string KEY8 = "ThisIsKey8"; + std::string KEY9 = "ThisIsKey9"; + const std::string NOT_FOUND = "NOT_FOUND"; + + options.statistics = CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kAll); + options.create_if_missing = true; + options.compression = kNoCompression; + options.inplace_update_support = false; + options.allow_concurrent_memtable_write = true; +#ifndef ROCKSDB_LITE + TestFlushListener* listener = new TestFlushListener(options.env, this); + options.listeners.emplace_back(listener); +#endif // !ROCKSDB_LITE + // Create a ConditionalUpdate compaction filter + // that will update all the values of the KV pairs + // where the keys are "lower" than KEY4. + options.compaction_filter_factory = + std::make_shared(KEY4); + + // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes). + options.write_buffer_size = 1 << 20; + // Activate the MemPurge prototype. + options.experimental_mempurge_threshold = 1.0; + + ASSERT_OK(TryReopen(options)); + + std::atomic mempurge_count{0}; + std::atomic sst_count{0}; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushJob:MemPurgeSuccessful", + [&](void* /*arg*/) { mempurge_count++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + Random rnd(53); + const size_t NUM_REPEAT = 1000; + const size_t RAND_VALUES_LENGTH = 10240; + std::string p_v1, p_v2, p_v3, p_v4, p_v5, p_v6, p_v7, p_v8, p_v9; + + p_v1 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v2 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v3 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v4 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v5 = rnd.RandomString(RAND_VALUES_LENGTH); + ASSERT_OK(Put(KEY1, p_v1)); + ASSERT_OK(Put(KEY2, p_v2)); + ASSERT_OK(Put(KEY3, p_v3)); + ASSERT_OK(Put(KEY4, p_v4)); + ASSERT_OK(Put(KEY5, p_v5)); + ASSERT_OK(Delete(KEY1)); + + // Insertion of of K-V pairs, multiple times. + for (size_t i = 0; i < NUM_REPEAT; i++) { + // Create value strings of arbitrary + // length RAND_VALUES_LENGTH bytes. + p_v6 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v7 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v8 = rnd.RandomString(RAND_VALUES_LENGTH); + p_v9 = rnd.RandomString(RAND_VALUES_LENGTH); + ASSERT_OK(Put(KEY6, p_v6)); + ASSERT_OK(Put(KEY7, p_v7)); + ASSERT_OK(Put(KEY8, p_v8)); + ASSERT_OK(Put(KEY9, p_v9)); + + ASSERT_OK(Delete(KEY7)); + } + + // Check that there was at least one mempurge + const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; + // Check that there was no SST files created during flush. + const uint32_t EXPECTED_SST_COUNT = 0; + + EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); + EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); + + // Verify that the ConditionalUpdateCompactionFilter + // updated the values of KEY2 and KEY3, and not KEY4 and KEY5. + ASSERT_EQ(Get(KEY1), NOT_FOUND); + ASSERT_EQ(Get(KEY2), NEW_VALUE); + ASSERT_EQ(Get(KEY3), NEW_VALUE); + ASSERT_EQ(Get(KEY4), p_v4); + ASSERT_EQ(Get(KEY5), p_v5); +} + +TEST_F(DBFlushTest, DISABLED_MemPurgeWALSupport) { + Options options = CurrentOptions(); + + options.statistics = CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kAll); + options.create_if_missing = true; + options.compression = kNoCompression; + options.inplace_update_support = false; + options.allow_concurrent_memtable_write = true; + + // Enforce size of a single MemTable to 128KB. + options.write_buffer_size = 128 << 10; + // Activate the MemPurge prototype. + options.experimental_mempurge_threshold = 1.0; + + ASSERT_OK(TryReopen(options)); + + const size_t KVSIZE = 10; + + do { + CreateAndReopenWithCF({"pikachu"}, options); + ASSERT_OK(Put(1, "foo", "v1")); + ASSERT_OK(Put(1, "baz", "v5")); + + ReopenWithColumnFamilies({"default", "pikachu"}, options); + ASSERT_EQ("v1", Get(1, "foo")); + + ASSERT_EQ("v1", Get(1, "foo")); + ASSERT_EQ("v5", Get(1, "baz")); + ASSERT_OK(Put(0, "bar", "v2")); + ASSERT_OK(Put(1, "bar", "v2")); + ASSERT_OK(Put(1, "foo", "v3")); + std::atomic mempurge_count{0}; + std::atomic sst_count{0}; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushJob:MemPurgeSuccessful", + [&](void* /*arg*/) { mempurge_count++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + std::vector keys; + for (size_t k = 0; k < KVSIZE; k++) { + keys.push_back("IamKey" + std::to_string(k)); + } + + std::string RNDKEY, RNDVALUE; + const std::string NOT_FOUND = "NOT_FOUND"; + + // Heavy overwrite workload, + // more than would fit in maximum allowed memtables. + Random rnd(719); + const size_t NUM_REPEAT = 100; + const size_t RAND_KEY_LENGTH = 4096; + const size_t RAND_VALUES_LENGTH = 1024; + std::vector values_default(KVSIZE), values_pikachu(KVSIZE); + + // Insert a very first set of keys that will be + // mempurged at least once. + for (size_t k = 0; k < KVSIZE / 2; k++) { + values_default[k] = rnd.RandomString(RAND_VALUES_LENGTH); + values_pikachu[k] = rnd.RandomString(RAND_VALUES_LENGTH); + } + + // Insert keys[0:KVSIZE/2] to + // both 'default' and 'pikachu' CFs. + for (size_t k = 0; k < KVSIZE / 2; k++) { + ASSERT_OK(Put(0, keys[k], values_default[k])); + ASSERT_OK(Put(1, keys[k], values_pikachu[k])); + } + + // Check that the insertion was seamless. + for (size_t k = 0; k < KVSIZE / 2; k++) { + ASSERT_EQ(Get(0, keys[k]), values_default[k]); + ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); + } + + // Insertion of of K-V pairs, multiple times (overwrites) + // into 'default' CF. Will trigger mempurge. + for (size_t j = 0; j < NUM_REPEAT; j++) { + // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. + for (size_t k = KVSIZE / 2; k < KVSIZE; k++) { + values_default[k] = rnd.RandomString(RAND_VALUES_LENGTH); + } + + // Insert K-V into default CF. + for (size_t k = KVSIZE / 2; k < KVSIZE; k++) { + ASSERT_OK(Put(0, keys[k], values_default[k])); + } + + // Check key validity, for all keys, both in + // default and pikachu CFs. + for (size_t k = 0; k < KVSIZE; k++) { + ASSERT_EQ(Get(0, keys[k]), values_default[k]); + } + // Note that at this point, only keys[0:KVSIZE/2] + // have been inserted into Pikachu. + for (size_t k = 0; k < KVSIZE / 2; k++) { + ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); + } + } + + // Insertion of of K-V pairs, multiple times (overwrites) + // into 'pikachu' CF. Will trigger mempurge. + // Check that we keep the older logs for 'default' imm(). + for (size_t j = 0; j < NUM_REPEAT; j++) { + // Create value strings of arbitrary length RAND_VALUES_LENGTH bytes. + for (size_t k = KVSIZE / 2; k < KVSIZE; k++) { + values_pikachu[k] = rnd.RandomString(RAND_VALUES_LENGTH); + } + + // Insert K-V into pikachu CF. + for (size_t k = KVSIZE / 2; k < KVSIZE; k++) { + ASSERT_OK(Put(1, keys[k], values_pikachu[k])); + } + + // Check key validity, for all keys, + // both in default and pikachu. + for (size_t k = 0; k < KVSIZE; k++) { + ASSERT_EQ(Get(0, keys[k]), values_default[k]); + ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); + } + } + + // Check that there was at least one mempurge + const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1; + // Check that there was no SST files created during flush. + const uint32_t EXPECTED_SST_COUNT = 0; + + EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT); + if (options.experimental_mempurge_threshold == + std::numeric_limits::max()) { + EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT); + } + + ReopenWithColumnFamilies({"default", "pikachu"}, options); + // Check that there was no data corruption anywhere, + // not in 'default' nor in 'Pikachu' CFs. + ASSERT_EQ("v3", Get(1, "foo")); + ASSERT_OK(Put(1, "foo", "v4")); + ASSERT_EQ("v4", Get(1, "foo")); + ASSERT_EQ("v2", Get(1, "bar")); + ASSERT_EQ("v5", Get(1, "baz")); + // Check keys in 'Default' and 'Pikachu'. + // keys[0:KVSIZE/2] were for sure contained + // in the imm() at Reopen/recovery time. + for (size_t k = 0; k < KVSIZE; k++) { + ASSERT_EQ(Get(0, keys[k]), values_default[k]); + ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); + } + // Insertion of random K-V pairs to trigger + // a flush in the Pikachu CF. + for (size_t j = 0; j < NUM_REPEAT; j++) { + RNDKEY = rnd.RandomString(RAND_KEY_LENGTH); + RNDVALUE = rnd.RandomString(RAND_VALUES_LENGTH); + ASSERT_OK(Put(1, RNDKEY, RNDVALUE)); + } + // ASsert than there was at least one flush to storage. + EXPECT_GT(sst_count.exchange(0), EXPECTED_SST_COUNT); + ReopenWithColumnFamilies({"default", "pikachu"}, options); + ASSERT_EQ("v4", Get(1, "foo")); + ASSERT_EQ("v2", Get(1, "bar")); + ASSERT_EQ("v5", Get(1, "baz")); + // Since values in default are held in mutable mem() + // and imm(), check if the flush in pikachu didn't + // affect these values. + for (size_t k = 0; k < KVSIZE; k++) { + ASSERT_EQ(Get(0, keys[k]), values_default[k]); + ASSERT_EQ(Get(1, keys[k]), values_pikachu[k]); + } + ASSERT_EQ(Get(1, RNDKEY), RNDVALUE); + } while (ChangeWalOptions()); +} + TEST_P(DBFlushDirectIOTest, DirectIO) { Options options; options.create_if_missing = true; options.disable_auto_compactions = true; options.max_background_flushes = 2; options.use_direct_io_for_flush_and_compaction = GetParam(); - options.env = new MockEnv(Env::Default()); + options.env = MockEnv::Create(Env::Default()); SyncPoint::GetInstance()->SetCallBack( "BuildTable:create_file", [&](void* arg) { bool* use_direct_writes = static_cast(arg); @@ -305,7 +1443,8 @@ // mode. fault_injection_env->SetFilesystemActive(false); ASSERT_OK(db_->ContinueBackgroundWork()); - dbfull()->TEST_WaitForFlushMemTable(); + // We ingested the error to env, so the returned status is not OK. + ASSERT_NOK(dbfull()->TEST_WaitForFlushMemTable()); #ifndef ROCKSDB_LITE uint64_t num_bg_errors; ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBackgroundErrors, @@ -379,9 +1518,9 @@ DBImpl* db_impl = static_cast_with_check(db); InstrumentedMutex* mutex = db_impl->mutex(); mutex->Lock(); - auto* cfd = - reinterpret_cast(db->DefaultColumnFamily()) - ->cfd(); + auto* cfd = static_cast_with_check( + db->DefaultColumnFamily()) + ->cfd(); ASSERT_LT(seq, cfd->imm()->current()->GetEarliestSequenceNumber()); mutex->Unlock(); } @@ -394,7 +1533,7 @@ std::shared_ptr listener = std::make_shared(); SyncPoint::GetInstance()->LoadDependency( - {{"DBImpl::BackgroundCallFlush:start", + {{"DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:WaitFirst"}, {"DBImpl::FlushMemTableToOutputFile:Finish", "DBFlushTest::FireOnFlushCompletedAfterCommittedResult:WaitSecond"}}); @@ -443,6 +1582,568 @@ } #endif // !ROCKSDB_LITE +TEST_F(DBFlushTest, FlushWithBlob) { + constexpr uint64_t min_blob_size = 10; + + Options options; + options.enable_blob_files = true; + options.min_blob_size = min_blob_size; + options.disable_auto_compactions = true; + options.env = env_; + + Reopen(options); + + constexpr char short_value[] = "short"; + static_assert(sizeof(short_value) - 1 < min_blob_size, + "short_value too long"); + + constexpr char long_value[] = "long_value"; + static_assert(sizeof(long_value) - 1 >= min_blob_size, + "long_value too short"); + + ASSERT_OK(Put("key1", short_value)); + ASSERT_OK(Put("key2", long_value)); + + ASSERT_OK(Flush()); + + ASSERT_EQ(Get("key1"), short_value); + ASSERT_EQ(Get("key2"), long_value); + + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + + ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); + assert(cfd); + + Version* const current = cfd->current(); + assert(current); + + const VersionStorageInfo* const storage_info = current->storage_info(); + assert(storage_info); + + const auto& l0_files = storage_info->LevelFiles(0); + ASSERT_EQ(l0_files.size(), 1); + + const FileMetaData* const table_file = l0_files[0]; + assert(table_file); + + const auto& blob_files = storage_info->GetBlobFiles(); + ASSERT_EQ(blob_files.size(), 1); + + const auto& blob_file = blob_files.begin()->second; + assert(blob_file); + + ASSERT_EQ(table_file->smallest.user_key(), "key1"); + ASSERT_EQ(table_file->largest.user_key(), "key2"); + ASSERT_EQ(table_file->fd.smallest_seqno, 1); + ASSERT_EQ(table_file->fd.largest_seqno, 2); + ASSERT_EQ(table_file->oldest_blob_file_number, + blob_file->GetBlobFileNumber()); + + ASSERT_EQ(blob_file->GetTotalBlobCount(), 1); + +#ifndef ROCKSDB_LITE + const InternalStats* const internal_stats = cfd->internal_stats(); + assert(internal_stats); + + const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); + ASSERT_FALSE(compaction_stats.empty()); + ASSERT_EQ(compaction_stats[0].bytes_written, table_file->fd.GetFileSize()); + ASSERT_EQ(compaction_stats[0].bytes_written_blob, + blob_file->GetTotalBlobBytes()); + ASSERT_EQ(compaction_stats[0].num_output_files, 1); + ASSERT_EQ(compaction_stats[0].num_output_files_blob, 1); + + const uint64_t* const cf_stats_value = internal_stats->TEST_GetCFStatsValue(); + ASSERT_EQ(cf_stats_value[InternalStats::BYTES_FLUSHED], + compaction_stats[0].bytes_written + + compaction_stats[0].bytes_written_blob); +#endif // ROCKSDB_LITE +} + +TEST_F(DBFlushTest, FlushWithChecksumHandoff1) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + std::shared_ptr fault_fs( + new FaultInjectionTestFS(FileSystem::Default())); + std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); + Options options = CurrentOptions(); + options.write_buffer_size = 100; + options.max_write_buffer_number = 4; + options.min_write_buffer_number_to_merge = 3; + options.disable_auto_compactions = true; + options.env = fault_fs_env.get(); + options.checksum_handoff_file_types.Add(FileType::kTableFile); + Reopen(options); + + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + ASSERT_OK(Put("key1", "value1")); + ASSERT_OK(Put("key2", "value2")); + ASSERT_OK(dbfull()->TEST_SwitchMemtable()); + + // The hash does not match, write fails + // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + // Since the file system returns IOStatus::Corruption, it is an + // unrecoverable error. + SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + }); + ASSERT_OK(Put("key3", "value3")); + ASSERT_OK(Put("key4", "value4")); + SyncPoint::GetInstance()->EnableProcessing(); + Status s = Flush(); + ASSERT_EQ(s.severity(), + ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); + SyncPoint::GetInstance()->DisableProcessing(); + Destroy(options); + Reopen(options); + + // The file system does not support checksum handoff. The check + // will be ignored. + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); + ASSERT_OK(Put("key5", "value5")); + ASSERT_OK(Put("key6", "value6")); + ASSERT_OK(dbfull()->TEST_SwitchMemtable()); + + // Each write will be similated as corrupted. + // Since the file system returns IOStatus::Corruption, it is an + // unrecoverable error. + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { + fault_fs->IngestDataCorruptionBeforeWrite(); + }); + ASSERT_OK(Put("key7", "value7")); + ASSERT_OK(Put("key8", "value8")); + SyncPoint::GetInstance()->EnableProcessing(); + s = Flush(); + ASSERT_EQ(s.severity(), + ROCKSDB_NAMESPACE::Status::Severity::kUnrecoverableError); + SyncPoint::GetInstance()->DisableProcessing(); + + Destroy(options); +} + +TEST_F(DBFlushTest, FlushWithChecksumHandoff2) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + std::shared_ptr fault_fs( + new FaultInjectionTestFS(FileSystem::Default())); + std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); + Options options = CurrentOptions(); + options.write_buffer_size = 100; + options.max_write_buffer_number = 4; + options.min_write_buffer_number_to_merge = 3; + options.disable_auto_compactions = true; + options.env = fault_fs_env.get(); + Reopen(options); + + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + ASSERT_OK(Put("key1", "value1")); + ASSERT_OK(Put("key2", "value2")); + ASSERT_OK(Flush()); + + // options is not set, the checksum handoff will not be triggered + SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + }); + ASSERT_OK(Put("key3", "value3")); + ASSERT_OK(Put("key4", "value4")); + SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Flush()); + SyncPoint::GetInstance()->DisableProcessing(); + Destroy(options); + Reopen(options); + + // The file system does not support checksum handoff. The check + // will be ignored. + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); + ASSERT_OK(Put("key5", "value5")); + ASSERT_OK(Put("key6", "value6")); + ASSERT_OK(Flush()); + + // options is not set, the checksum handoff will not be triggered + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + SyncPoint::GetInstance()->SetCallBack("FlushJob::Start", [&](void*) { + fault_fs->IngestDataCorruptionBeforeWrite(); + }); + ASSERT_OK(Put("key7", "value7")); + ASSERT_OK(Put("key8", "value8")); + SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Flush()); + SyncPoint::GetInstance()->DisableProcessing(); + + Destroy(options); +} + +TEST_F(DBFlushTest, FlushWithChecksumHandoffManifest1) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + std::shared_ptr fault_fs( + new FaultInjectionTestFS(FileSystem::Default())); + std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); + Options options = CurrentOptions(); + options.write_buffer_size = 100; + options.max_write_buffer_number = 4; + options.min_write_buffer_number_to_merge = 3; + options.disable_auto_compactions = true; + options.env = fault_fs_env.get(); + options.checksum_handoff_file_types.Add(FileType::kDescriptorFile); + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + Reopen(options); + + ASSERT_OK(Put("key1", "value1")); + ASSERT_OK(Put("key2", "value2")); + ASSERT_OK(Flush()); + + // The hash does not match, write fails + // fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + // Since the file system returns IOStatus::Corruption, it is mapped to + // kFatalError error. + ASSERT_OK(Put("key3", "value3")); + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::LogAndApply:WriteManifest", [&](void*) { + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kxxHash); + }); + ASSERT_OK(Put("key3", "value3")); + ASSERT_OK(Put("key4", "value4")); + SyncPoint::GetInstance()->EnableProcessing(); + Status s = Flush(); + ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); + SyncPoint::GetInstance()->DisableProcessing(); + Destroy(options); +} + +TEST_F(DBFlushTest, FlushWithChecksumHandoffManifest2) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + std::shared_ptr fault_fs( + new FaultInjectionTestFS(FileSystem::Default())); + std::unique_ptr fault_fs_env(NewCompositeEnv(fault_fs)); + Options options = CurrentOptions(); + options.write_buffer_size = 100; + options.max_write_buffer_number = 4; + options.min_write_buffer_number_to_merge = 3; + options.disable_auto_compactions = true; + options.env = fault_fs_env.get(); + options.checksum_handoff_file_types.Add(FileType::kDescriptorFile); + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kNoChecksum); + Reopen(options); + // The file system does not support checksum handoff. The check + // will be ignored. + ASSERT_OK(Put("key5", "value5")); + ASSERT_OK(Put("key6", "value6")); + ASSERT_OK(Flush()); + + // Each write will be similated as corrupted. + // Since the file system returns IOStatus::Corruption, it is mapped to + // kFatalError error. + fault_fs->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::LogAndApply:WriteManifest", + [&](void*) { fault_fs->IngestDataCorruptionBeforeWrite(); }); + ASSERT_OK(Put("key7", "value7")); + ASSERT_OK(Put("key8", "value8")); + SyncPoint::GetInstance()->EnableProcessing(); + Status s = Flush(); + ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kFatalError); + SyncPoint::GetInstance()->DisableProcessing(); + + Destroy(options); +} + +TEST_F(DBFlushTest, PickRightMemtables) { + Options options = CurrentOptions(); + DestroyAndReopen(options); + options.create_if_missing = true; + + const std::string test_cf_name = "test_cf"; + options.max_write_buffer_number = 128; + CreateColumnFamilies({test_cf_name}, options); + + Close(); + + ReopenWithColumnFamilies({kDefaultColumnFamilyName, test_cf_name}, options); + + ASSERT_OK(db_->Put(WriteOptions(), "key", "value")); + + ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "key", "value")); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::SyncClosedLogs:BeforeReLock", [&](void* /*arg*/) { + ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "what", "v")); + auto* cfhi = + static_cast_with_check(handles_[1]); + assert(cfhi); + ASSERT_OK(dbfull()->TEST_SwitchMemtable(cfhi->cfd())); + }); + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", [&](void* arg) { + auto* job = reinterpret_cast(arg); + assert(job); + const auto& mems = job->GetMemTables(); + assert(mems.size() == 1); + assert(mems[0]); + ASSERT_EQ(1, mems[0]->GetID()); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_OK(db_->Flush(FlushOptions(), handles_[1])); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +class DBFlushTestBlobError : public DBFlushTest, + public testing::WithParamInterface { + public: + DBFlushTestBlobError() : sync_point_(GetParam()) {} + + std::string sync_point_; +}; + +INSTANTIATE_TEST_CASE_P(DBFlushTestBlobError, DBFlushTestBlobError, + ::testing::ValuesIn(std::vector{ + "BlobFileBuilder::WriteBlobToFile:AddRecord", + "BlobFileBuilder::WriteBlobToFile:AppendFooter"})); + +TEST_P(DBFlushTestBlobError, FlushError) { + Options options; + options.enable_blob_files = true; + options.disable_auto_compactions = true; + options.env = env_; + + Reopen(options); + + ASSERT_OK(Put("key", "blob")); + + SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) { + Status* const s = static_cast(arg); + assert(s); + + (*s) = Status::IOError(sync_point_); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_NOK(Flush()); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + VersionSet* const versions = dbfull()->GetVersionSet(); + assert(versions); + + ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault(); + assert(cfd); + + Version* const current = cfd->current(); + assert(current); + + const VersionStorageInfo* const storage_info = current->storage_info(); + assert(storage_info); + + const auto& l0_files = storage_info->LevelFiles(0); + ASSERT_TRUE(l0_files.empty()); + + const auto& blob_files = storage_info->GetBlobFiles(); + ASSERT_TRUE(blob_files.empty()); + + // Make sure the files generated by the failed job have been deleted + std::vector files; + ASSERT_OK(env_->GetChildren(dbname_, &files)); + for (const auto& file : files) { + uint64_t number = 0; + FileType type = kTableFile; + + if (!ParseFileName(file, &number, &type)) { + continue; + } + + ASSERT_NE(type, kTableFile); + ASSERT_NE(type, kBlobFile); + } + +#ifndef ROCKSDB_LITE + const InternalStats* const internal_stats = cfd->internal_stats(); + assert(internal_stats); + + const auto& compaction_stats = internal_stats->TEST_GetCompactionStats(); + ASSERT_FALSE(compaction_stats.empty()); + + if (sync_point_ == "BlobFileBuilder::WriteBlobToFile:AddRecord") { + ASSERT_EQ(compaction_stats[0].bytes_written, 0); + ASSERT_EQ(compaction_stats[0].bytes_written_blob, 0); + ASSERT_EQ(compaction_stats[0].num_output_files, 0); + ASSERT_EQ(compaction_stats[0].num_output_files_blob, 0); + } else { + // SST file writing succeeded; blob file writing failed (during Finish) + ASSERT_GT(compaction_stats[0].bytes_written, 0); + ASSERT_EQ(compaction_stats[0].bytes_written_blob, 0); + ASSERT_EQ(compaction_stats[0].num_output_files, 1); + ASSERT_EQ(compaction_stats[0].num_output_files_blob, 0); + } + + const uint64_t* const cf_stats_value = internal_stats->TEST_GetCFStatsValue(); + ASSERT_EQ(cf_stats_value[InternalStats::BYTES_FLUSHED], + compaction_stats[0].bytes_written + + compaction_stats[0].bytes_written_blob); +#endif // ROCKSDB_LITE +} + +#ifndef ROCKSDB_LITE +TEST_F(DBFlushTest, TombstoneVisibleInSnapshot) { + class SimpleTestFlushListener : public EventListener { + public: + explicit SimpleTestFlushListener(DBFlushTest* _test) : test_(_test) {} + ~SimpleTestFlushListener() override {} + + void OnFlushBegin(DB* db, const FlushJobInfo& info) override { + ASSERT_EQ(static_cast(0), info.cf_id); + + ASSERT_OK(db->Delete(WriteOptions(), "foo")); + snapshot_ = db->GetSnapshot(); + ASSERT_OK(db->Put(WriteOptions(), "foo", "value")); + + auto* dbimpl = static_cast_with_check(db); + assert(dbimpl); + + ColumnFamilyHandle* cfh = db->DefaultColumnFamily(); + auto* cfhi = static_cast_with_check(cfh); + assert(cfhi); + ASSERT_OK(dbimpl->TEST_SwitchMemtable(cfhi->cfd())); + } + + DBFlushTest* test_ = nullptr; + const Snapshot* snapshot_ = nullptr; + }; + + Options options = CurrentOptions(); + options.create_if_missing = true; + auto* listener = new SimpleTestFlushListener(this); + options.listeners.emplace_back(listener); + DestroyAndReopen(options); + + ASSERT_OK(db_->Put(WriteOptions(), "foo", "value0")); + + ManagedSnapshot snapshot_guard(db_); + + ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily(); + ASSERT_OK(db_->Flush(FlushOptions(), default_cf)); + + const Snapshot* snapshot = listener->snapshot_; + assert(snapshot); + + ReadOptions read_opts; + read_opts.snapshot = snapshot; + + // Using snapshot should not see "foo". + { + std::string value; + Status s = db_->Get(read_opts, "foo", &value); + ASSERT_TRUE(s.IsNotFound()); + } + + db_->ReleaseSnapshot(snapshot); +} + +TEST_P(DBAtomicFlushTest, ManualFlushUnder2PC) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.allow_2pc = true; + options.atomic_flush = GetParam(); + // 64MB so that memtable flush won't be trigger by the small writes. + options.write_buffer_size = (static_cast(64) << 20); + + // Destroy the DB to recreate as a TransactionDB. + Close(); + Destroy(options, true); + + // Create a TransactionDB. + TransactionDB* txn_db = nullptr; + TransactionDBOptions txn_db_opts; + txn_db_opts.write_policy = TxnDBWritePolicy::WRITE_COMMITTED; + ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, &txn_db)); + ASSERT_NE(txn_db, nullptr); + db_ = txn_db; + + // Create two more columns other than default CF. + std::vector cfs = {"puppy", "kitty"}; + CreateColumnFamilies(cfs, options); + ASSERT_EQ(handles_.size(), 2); + ASSERT_EQ(handles_[0]->GetName(), cfs[0]); + ASSERT_EQ(handles_[1]->GetName(), cfs[1]); + const size_t kNumCfToFlush = options.atomic_flush ? 2 : 1; + + WriteOptions wopts; + TransactionOptions txn_opts; + // txn1 only prepare, but does not commit. + // The WAL containing the prepared but uncommitted data must be kept. + Transaction* txn1 = txn_db->BeginTransaction(wopts, txn_opts, nullptr); + // txn2 not only prepare, but also commit. + Transaction* txn2 = txn_db->BeginTransaction(wopts, txn_opts, nullptr); + ASSERT_NE(txn1, nullptr); + ASSERT_NE(txn2, nullptr); + for (size_t i = 0; i < kNumCfToFlush; i++) { + ASSERT_OK(txn1->Put(handles_[i], "k1", "v1")); + ASSERT_OK(txn2->Put(handles_[i], "k2", "v2")); + } + // A txn must be named before prepare. + ASSERT_OK(txn1->SetName("txn1")); + ASSERT_OK(txn2->SetName("txn2")); + // Prepare writes to WAL, but not to memtable. (WriteCommitted) + ASSERT_OK(txn1->Prepare()); + ASSERT_OK(txn2->Prepare()); + // Commit writes to memtable. + ASSERT_OK(txn2->Commit()); + delete txn1; + delete txn2; + + // There are still data in memtable not flushed. + // But since data is small enough to reside in the active memtable, + // there are no immutable memtable. + for (size_t i = 0; i < kNumCfToFlush; i++) { + auto cfh = static_cast(handles_[i]); + ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); + ASSERT_FALSE(cfh->cfd()->mem()->IsEmpty()); + } + + // Atomic flush memtables, + // the min log with prepared data should be written to MANIFEST. + std::vector cfs_to_flush(kNumCfToFlush); + for (size_t i = 0; i < kNumCfToFlush; i++) { + cfs_to_flush[i] = handles_[i]; + } + ASSERT_OK(txn_db->Flush(FlushOptions(), cfs_to_flush)); + + // There are no remaining data in memtable after flush. + for (size_t i = 0; i < kNumCfToFlush; i++) { + auto cfh = static_cast(handles_[i]); + ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); + ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); + ASSERT_EQ(cfh->cfd()->GetFlushReason(), FlushReason::kManualFlush); + } + + // The recovered min log number with prepared data should be non-zero. + // In 2pc mode, MinLogNumberToKeep returns the + // VersionSet::min_log_number_to_keep_2pc recovered from MANIFEST, if it's 0, + // it means atomic flush didn't write the min_log_number_to_keep to MANIFEST. + cfs.push_back(kDefaultColumnFamilyName); + ASSERT_OK(TryReopenWithColumnFamilies(cfs, options)); + DBImpl* db_impl = reinterpret_cast(db_); + ASSERT_TRUE(db_impl->allow_2pc()); + ASSERT_NE(db_impl->MinLogNumberToKeep(), 0); +} +#endif // ROCKSDB_LITE + TEST_P(DBAtomicFlushTest, ManualAtomicFlush) { Options options = CurrentOptions(); options.create_if_missing = true; @@ -457,18 +2158,84 @@ for (size_t i = 0; i != num_cfs; ++i) { ASSERT_OK(Put(static_cast(i) /*cf*/, "key", "value", wopts)); } + + for (size_t i = 0; i != num_cfs; ++i) { + auto cfh = static_cast(handles_[i]); + ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); + ASSERT_FALSE(cfh->cfd()->mem()->IsEmpty()); + } + std::vector cf_ids; for (size_t i = 0; i != num_cfs; ++i) { cf_ids.emplace_back(static_cast(i)); } ASSERT_OK(Flush(cf_ids)); + for (size_t i = 0; i != num_cfs; ++i) { auto cfh = static_cast(handles_[i]); + ASSERT_EQ(cfh->cfd()->GetFlushReason(), FlushReason::kManualFlush); ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); } } +TEST_P(DBAtomicFlushTest, PrecomputeMinLogNumberToKeepNon2PC) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.atomic_flush = GetParam(); + options.write_buffer_size = (static_cast(64) << 20); + CreateAndReopenWithCF({"pikachu"}, options); + + const size_t num_cfs = handles_.size(); + ASSERT_EQ(num_cfs, 2); + WriteOptions wopts; + for (size_t i = 0; i != num_cfs; ++i) { + ASSERT_OK(Put(static_cast(i) /*cf*/, "key", "value", wopts)); + } + + { + // Flush the default CF only. + std::vector cf_ids{0}; + ASSERT_OK(Flush(cf_ids)); + + autovector flushed_cfds; + autovector> flush_edits; + auto flushed_cfh = static_cast(handles_[0]); + flushed_cfds.push_back(flushed_cfh->cfd()); + flush_edits.push_back({}); + auto unflushed_cfh = static_cast(handles_[1]); + + ASSERT_EQ(PrecomputeMinLogNumberToKeepNon2PC(dbfull()->GetVersionSet(), + flushed_cfds, flush_edits), + unflushed_cfh->cfd()->GetLogNumber()); + } + + { + // Flush all CFs. + std::vector cf_ids; + for (size_t i = 0; i != num_cfs; ++i) { + cf_ids.emplace_back(static_cast(i)); + } + ASSERT_OK(Flush(cf_ids)); + uint64_t log_num_after_flush = dbfull()->TEST_GetCurrentLogNumber(); + + uint64_t min_log_number_to_keep = port::kMaxUint64; + autovector flushed_cfds; + autovector> flush_edits; + for (size_t i = 0; i != num_cfs; ++i) { + auto cfh = static_cast(handles_[i]); + flushed_cfds.push_back(cfh->cfd()); + flush_edits.push_back({}); + min_log_number_to_keep = + std::min(min_log_number_to_keep, cfh->cfd()->GetLogNumber()); + } + ASSERT_EQ(min_log_number_to_keep, log_num_after_flush); + ASSERT_EQ(PrecomputeMinLogNumberToKeepNon2PC(dbfull()->GetVersionSet(), + flushed_cfds, flush_edits), + min_log_number_to_keep); + } +} + TEST_P(DBAtomicFlushTest, AtomicFlushTriggeredByMemTableFull) { Options options = CurrentOptions(); options.create_if_missing = true; @@ -499,13 +2266,13 @@ TEST_SYNC_POINT( "DBAtomicFlushTest::AtomicFlushTriggeredByMemTableFull:BeforeCheck"); if (options.atomic_flush) { - for (size_t i = 0; i != num_cfs - 1; ++i) { + for (size_t i = 0; i + 1 != num_cfs; ++i) { auto cfh = static_cast(handles_[i]); ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); ASSERT_TRUE(cfh->cfd()->mem()->IsEmpty()); } } else { - for (size_t i = 0; i != num_cfs - 1; ++i) { + for (size_t i = 0; i + 1 != num_cfs; ++i) { auto cfh = static_cast(handles_[i]); ASSERT_EQ(0, cfh->cfd()->imm()->NumNotFlushed()); ASSERT_FALSE(cfh->cfd()->mem()->IsEmpty()); @@ -549,7 +2316,8 @@ fault_injection_env->SetFilesystemActive(false); TEST_SYNC_POINT("DBAtomicFlushTest::AtomicFlushRollbackSomeJobs:2"); for (auto* cfh : handles_) { - dbfull()->TEST_WaitForFlushMemTable(cfh); + // Returns the IO error happend during flush. + ASSERT_NOK(dbfull()->TEST_WaitForFlushMemTable(cfh)); } for (size_t i = 0; i != num_cfs; ++i) { auto cfh = static_cast(handles_[i]); @@ -651,7 +2419,7 @@ options.create_if_missing = true; options.atomic_flush = atomic_flush; options.memtable_factory.reset( - new SpecialSkipListFactory(kNumKeysTriggerFlush)); + test::NewSpecialSkipListFactory(kNumKeysTriggerFlush)); CreateAndReopenWithCF({"pikachu"}, options); for (int i = 0; i != kNumKeysTriggerFlush; ++i) { @@ -770,6 +2538,122 @@ SyncPoint::GetInstance()->ClearAllCallBacks(); } +// In atomic flush, concurrent bg flush threads commit to the MANIFEST in +// serial, in the order of their picked memtables for each column family. +// Only when a bg flush thread finds out that its memtables are the earliest +// unflushed ones for all the included column families will this bg flush +// thread continue to commit to MANIFEST. +// This unit test uses sync point to coordinate the execution of two bg threads +// executing the same sequence of functions. The interleaving are as follows. +// time bg1 bg2 +// | pick memtables to flush +// | flush memtables cf1_m1, cf2_m1 +// | join MANIFEST write queue +// | pick memtabls to flush +// | flush memtables cf1_(m1+1) +// | join MANIFEST write queue +// | wait to write MANIFEST +// | write MANIFEST +// | IO error +// | detect IO error and stop waiting +// V +TEST_P(DBAtomicFlushTest, BgThreadNoWaitAfterManifestError) { + bool atomic_flush = GetParam(); + if (!atomic_flush) { + return; + } + auto fault_injection_env = std::make_shared(env_); + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.atomic_flush = true; + options.env = fault_injection_env.get(); + // Set a larger value than default so that RocksDB can schedule concurrent + // background flush threads. + options.max_background_jobs = 8; + options.max_write_buffer_number = 8; + CreateAndReopenWithCF({"pikachu"}, options); + + assert(2 == handles_.size()); + + WriteOptions write_opts; + write_opts.disableWAL = true; + + ASSERT_OK(Put(0, "a", "v_0_a", write_opts)); + ASSERT_OK(Put(1, "a", "v_1_a", write_opts)); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + + SyncPoint::GetInstance()->LoadDependency({ + {"BgFlushThr2:WaitToCommit", "BgFlushThr1:BeforeWriteManifest"}, + }); + + std::thread::id bg_flush_thr1, bg_flush_thr2; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCallFlush:start", [&](void*) { + if (bg_flush_thr1 == std::thread::id()) { + bg_flush_thr1 = std::this_thread::get_id(); + } else if (bg_flush_thr2 == std::thread::id()) { + bg_flush_thr2 = std::this_thread::get_id(); + } + }); + + int called = 0; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::AtomicFlushMemTablesToOutputFiles:WaitToCommit", [&](void* arg) { + if (std::this_thread::get_id() == bg_flush_thr2) { + const auto* ptr = reinterpret_cast*>(arg); + assert(ptr); + if (0 == called) { + // When bg flush thread 2 reaches here for the first time. + ASSERT_OK(ptr->first); + ASSERT_TRUE(ptr->second); + } else if (1 == called) { + // When bg flush thread 2 reaches here for the second time. + ASSERT_TRUE(ptr->first.IsIOError()); + ASSERT_FALSE(ptr->second); + } + ++called; + TEST_SYNC_POINT("BgFlushThr2:WaitToCommit"); + } + }); + + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::ProcessManifestWrites:BeforeWriteLastVersionEdit:0", + [&](void*) { + if (std::this_thread::get_id() == bg_flush_thr1) { + TEST_SYNC_POINT("BgFlushThr1:BeforeWriteManifest"); + } + }); + + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::LogAndApply:WriteManifest", [&](void*) { + if (std::this_thread::get_id() != bg_flush_thr1) { + return; + } + ASSERT_OK(db_->Put(write_opts, "b", "v_1_b")); + + FlushOptions flush_opts; + flush_opts.wait = false; + std::vector cfhs(1, db_->DefaultColumnFamily()); + ASSERT_OK(dbfull()->Flush(flush_opts, cfhs)); + }); + + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::ProcessManifestWrites:AfterSyncManifest", [&](void* arg) { + auto* ptr = reinterpret_cast(arg); + assert(ptr); + *ptr = IOStatus::IOError("Injected failure"); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_TRUE(dbfull()->Flush(FlushOptions(), handles_).IsIOError()); + + Close(); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + INSTANTIATE_TEST_CASE_P(DBFlushDirectIOTest, DBFlushDirectIOTest, testing::Bool()); diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,173 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE +#include "db/db_impl/compacted_db_impl.h" + +#include "db/db_impl/db_impl.h" +#include "db/version_set.h" +#include "logging/logging.h" +#include "table/get_context.h" +#include "util/cast_util.h" + +namespace ROCKSDB_NAMESPACE { + +extern void MarkKeyMayExist(void* arg); +extern bool SaveValue(void* arg, const ParsedInternalKey& parsed_key, + const Slice& v, bool hit_and_return); + +CompactedDBImpl::CompactedDBImpl(const DBOptions& options, + const std::string& dbname) + : DBImpl(options, dbname, /*seq_per_batch*/ false, +/*batch_per_txn*/ true, + /*read_only*/ true), + cfd_(nullptr), + version_(nullptr), + user_comparator_(nullptr) {} + +CompactedDBImpl::~CompactedDBImpl() { +} + +size_t CompactedDBImpl::FindFile(const Slice& key) { + size_t right = files_.num_files - 1; + auto cmp = [&](const FdWithKeyRange& f, const Slice& k) -> bool { + return user_comparator_->Compare(ExtractUserKey(f.largest_key), k) < 0; + }; + return static_cast(std::lower_bound(files_.files, + files_.files + right, key, cmp) - files_.files); +} + +Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*, + const Slice& key, PinnableSlice* value) { + GetContext get_context(user_comparator_, nullptr, nullptr, nullptr, + GetContext::kNotFound, key, value, nullptr, nullptr, + nullptr, true, nullptr, nullptr); + LookupKey lkey(key, kMaxSequenceNumber); + Status s = files_.files[FindFile(key)].fd.table_reader->Get( + options, lkey.internal_key(), &get_context, nullptr); + if (!s.ok() && !s.IsNotFound()) { + return s; + } + if (get_context.State() == GetContext::kFound) { + return Status::OK(); + } + return Status::NotFound(); +} + +std::vector CompactedDBImpl::MultiGet(const ReadOptions& options, + const std::vector&, + const std::vector& keys, std::vector* values) { + autovector reader_list; + for (const auto& key : keys) { + const FdWithKeyRange& f = files_.files[FindFile(key)]; + if (user_comparator_->Compare(key, ExtractUserKey(f.smallest_key)) < 0) { + reader_list.push_back(nullptr); + } else { + LookupKey lkey(key, kMaxSequenceNumber); + f.fd.table_reader->Prepare(lkey.internal_key()); + reader_list.push_back(f.fd.table_reader); + } + } + std::vector statuses(keys.size(), Status::NotFound()); + values->resize(keys.size()); + int idx = 0; + for (auto* r : reader_list) { + if (r != nullptr) { + PinnableSlice pinnable_val; + std::string& value = (*values)[idx]; + GetContext get_context(user_comparator_, nullptr, nullptr, nullptr, + GetContext::kNotFound, keys[idx], &pinnable_val, + nullptr, nullptr, nullptr, true, nullptr, nullptr); + LookupKey lkey(keys[idx], kMaxSequenceNumber); + Status s = r->Get(options, lkey.internal_key(), &get_context, nullptr); + assert(static_cast(idx) < statuses.size()); + if (!s.ok() && !s.IsNotFound()) { + statuses[idx] = s; + } else { + value.assign(pinnable_val.data(), pinnable_val.size()); + if (get_context.State() == GetContext::kFound) { + statuses[idx] = Status::OK(); + } + } + } + ++idx; + } + return statuses; +} + +Status CompactedDBImpl::Init(const Options& options) { + SuperVersionContext sv_context(/* create_superversion */ true); + mutex_.Lock(); + ColumnFamilyDescriptor cf(kDefaultColumnFamilyName, + ColumnFamilyOptions(options)); + Status s = Recover({cf}, true /* read only */, false, true); + if (s.ok()) { + cfd_ = static_cast_with_check(DefaultColumnFamily()) + ->cfd(); + cfd_->InstallSuperVersion(&sv_context, &mutex_); + } + mutex_.Unlock(); + sv_context.Clean(); + if (!s.ok()) { + return s; + } + NewThreadStatusCfInfo(cfd_); + version_ = cfd_->GetSuperVersion()->current; + user_comparator_ = cfd_->user_comparator(); + auto* vstorage = version_->storage_info(); + if (vstorage->num_non_empty_levels() == 0) { + return Status::NotSupported("no file exists"); + } + const LevelFilesBrief& l0 = vstorage->LevelFilesBrief(0); + // L0 should not have files + if (l0.num_files > 1) { + return Status::NotSupported("L0 contain more than 1 file"); + } + if (l0.num_files == 1) { + if (vstorage->num_non_empty_levels() > 1) { + return Status::NotSupported("Both L0 and other level contain files"); + } + files_ = l0; + return Status::OK(); + } + + for (int i = 1; i < vstorage->num_non_empty_levels() - 1; ++i) { + if (vstorage->LevelFilesBrief(i).num_files > 0) { + return Status::NotSupported("Other levels also contain files"); + } + } + + int level = vstorage->num_non_empty_levels() - 1; + if (vstorage->LevelFilesBrief(level).num_files > 0) { + files_ = vstorage->LevelFilesBrief(level); + return Status::OK(); + } + return Status::NotSupported("no file exists"); +} + +Status CompactedDBImpl::Open(const Options& options, + const std::string& dbname, DB** dbptr) { + *dbptr = nullptr; + + if (options.max_open_files != -1) { + return Status::InvalidArgument("require max_open_files = -1"); + } + if (options.merge_operator.get() != nullptr) { + return Status::InvalidArgument("merge operator is not supported"); + } + DBOptions db_options(options); + std::unique_ptr db(new CompactedDBImpl(db_options, dbname)); + Status s = db->Init(options); + if (s.ok()) { + db->StartPeriodicWorkScheduler(); + ROCKS_LOG_INFO(db->immutable_db_options_.info_log, + "Opened the db as fully compacted mode"); + LogFlush(db->immutable_db_options_.info_log); + *dbptr = db.release(); + } + return s; +} + +} // namespace ROCKSDB_NAMESPACE +#endif // ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h 1970-01-01 00:00:00.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h 2025-05-19 16:14:27.000000000 +0000 @@ -0,0 +1,118 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once +#ifndef ROCKSDB_LITE +#include +#include +#include "db/db_impl/db_impl.h" + +namespace ROCKSDB_NAMESPACE { + +class CompactedDBImpl : public DBImpl { + public: + CompactedDBImpl(const DBOptions& options, const std::string& dbname); + // No copying allowed + CompactedDBImpl(const CompactedDBImpl&) = delete; + void operator=(const CompactedDBImpl&) = delete; + + ~CompactedDBImpl() override; + + static Status Open(const Options& options, const std::string& dbname, + DB** dbptr); + + // Implementations of the DB interface + using DB::Get; + virtual Status Get(const ReadOptions& options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value) override; + using DB::MultiGet; + virtual std::vector MultiGet( + const ReadOptions& options, + const std::vector&, + const std::vector& keys, std::vector* values) + override; + + using DBImpl::Put; + virtual Status Put(const WriteOptions& /*options*/, + ColumnFamilyHandle* /*column_family*/, + const Slice& /*key*/, const Slice& /*value*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + using DBImpl::Merge; + virtual Status Merge(const WriteOptions& /*options*/, + ColumnFamilyHandle* /*column_family*/, + const Slice& /*key*/, const Slice& /*value*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + using DBImpl::Delete; + virtual Status Delete(const WriteOptions& /*options*/, + ColumnFamilyHandle* /*column_family*/, + const Slice& /*key*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + virtual Status Write(const WriteOptions& /*options*/, + WriteBatch* /*updates*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + using DBImpl::CompactRange; + virtual Status CompactRange(const CompactRangeOptions& /*options*/, + ColumnFamilyHandle* /*column_family*/, + const Slice* /*begin*/, + const Slice* /*end*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + + virtual Status DisableFileDeletions() override { + return Status::NotSupported("Not supported in compacted db mode."); + } + virtual Status EnableFileDeletions(bool /*force*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + virtual Status GetLiveFiles(std::vector& ret, + uint64_t* manifest_file_size, + bool /*flush_memtable*/) override { + return DBImpl::GetLiveFiles(ret, manifest_file_size, + false /* flush_memtable */); + } + using DBImpl::Flush; + virtual Status Flush(const FlushOptions& /*options*/, + ColumnFamilyHandle* /*column_family*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + + virtual Status SyncWAL() override { + return Status::NotSupported("Not supported in compacted db mode."); + } + + using DB::IngestExternalFile; + virtual Status IngestExternalFile( + ColumnFamilyHandle* /*column_family*/, + const std::vector& /*external_files*/, + const IngestExternalFileOptions& /*ingestion_options*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + using DB::CreateColumnFamilyWithImport; + virtual Status CreateColumnFamilyWithImport( + const ColumnFamilyOptions& /*options*/, + const std::string& /*column_family_name*/, + const ImportColumnFamilyOptions& /*import_options*/, + const ExportImportFilesMetaData& /*metadata*/, + ColumnFamilyHandle** /*handle*/) override { + return Status::NotSupported("Not supported in compacted db mode."); + } + + private: + friend class DB; + inline size_t FindFile(const Slice& key); + Status Init(const Options& options); + + ColumnFamilyData* cfd_; + Version* version_; + const Comparator* user_comparator_; + LevelFilesBrief files_; +}; +} // namespace ROCKSDB_NAMESPACE +#endif // ROCKSDB_LITE diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/db_impl.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/db_impl.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl.cc 2025-05-19 16:14:27.000000000 +0000 @@ -18,10 +18,10 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -45,6 +45,7 @@ #include "db/memtable_list.h" #include "db/merge_context.h" #include "db/merge_helper.h" +#include "db/periodic_work_scheduler.h" #include "db/range_tombstone_fragmenter.h" #include "db/table_cache.h" #include "db/table_properties_collector.h" @@ -52,7 +53,7 @@ #include "db/version_set.h" #include "db/write_batch_internal.h" #include "db/write_callback.h" -#include "env/composite_env_wrapper.h" +#include "env/unique_id_gen.h" #include "file/file_util.h" #include "file/filename.h" #include "file/random_access_file_reader.h" @@ -60,9 +61,8 @@ #include "logging/auto_roll_logger.h" #include "logging/log_buffer.h" #include "logging/logging.h" -#include "memtable/hash_linklist_rep.h" -#include "memtable/hash_skiplist_rep.h" #include "monitoring/in_memory_stats_history.h" +#include "monitoring/instrumented_mutex.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/perf_context_imp.h" #include "monitoring/persistent_stats_history.h" @@ -82,25 +82,29 @@ #include "rocksdb/stats_history.h" #include "rocksdb/status.h" #include "rocksdb/table.h" +#include "rocksdb/version.h" #include "rocksdb/write_buffer_manager.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_factory.h" #include "table/get_context.h" #include "table/merging_iterator.h" #include "table/multiget_context.h" +#include "table/sst_file_dumper.h" #include "table/table_builder.h" #include "table/two_level_iterator.h" +#include "table/unique_id_impl.h" #include "test_util/sync_point.h" -#include "tools/sst_dump_tool_imp.h" +#include "trace_replay/trace_replay.h" #include "util/autovector.h" -#include "util/build_version.h" #include "util/cast_util.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" +#include "util/defer.h" #include "util/mutexlock.h" #include "util/stop_watch.h" #include "util/string_util.h" +#include "utilities/trace/replayer_impl.h" namespace ROCKSDB_NAMESPACE { @@ -146,26 +150,31 @@ } // namespace DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, - const bool seq_per_batch, const bool batch_per_txn) + const bool seq_per_batch, const bool batch_per_txn, + bool read_only) : dbname_(dbname), own_info_log_(options.info_log == nullptr), - initial_db_options_(SanitizeOptions(dbname, options)), + initial_db_options_(SanitizeOptions(dbname, options, read_only)), env_(initial_db_options_.env), - fs_(initial_db_options_.file_system), + io_tracer_(std::make_shared()), immutable_db_options_(initial_db_options_), + fs_(immutable_db_options_.fs, io_tracer_), mutable_db_options_(initial_db_options_), - stats_(immutable_db_options_.statistics.get()), - mutex_(stats_, env_, DB_MUTEX_WAIT_MICROS, + stats_(immutable_db_options_.stats), + mutex_(stats_, immutable_db_options_.clock, DB_MUTEX_WAIT_MICROS, immutable_db_options_.use_adaptive_mutex), default_cf_handle_(nullptr), + error_handler_(this, immutable_db_options_, &mutex_), + event_logger_(immutable_db_options_.info_log.get()), max_total_in_memory_state_(0), file_options_(BuildDBOptions(immutable_db_options_, mutable_db_options_)), file_options_for_compaction_(fs_->OptimizeForCompactionTableWrite( file_options_, immutable_db_options_)), seq_per_batch_(seq_per_batch), batch_per_txn_(batch_per_txn), - db_lock_(nullptr), + next_job_id_(1), shutting_down_(false), + db_lock_(nullptr), manual_compaction_paused_(false), bg_cv_(&mutex_), logfile_number_(0), @@ -190,20 +199,22 @@ bg_purge_scheduled_(0), disable_delete_obsolete_files_(0), pending_purge_obsolete_files_(0), - delete_obsolete_files_last_run_(env_->NowMicros()), + delete_obsolete_files_last_run_(immutable_db_options_.clock->NowMicros()), last_stats_dump_time_microsec_(0), - next_job_id_(1), has_unpersisted_data_(false), unable_to_release_oldest_log_(false), num_running_ingest_file_(0), #ifndef ROCKSDB_LITE - wal_manager_(immutable_db_options_, file_options_, seq_per_batch), + wal_manager_(immutable_db_options_, file_options_, io_tracer_, + seq_per_batch), #endif // ROCKSDB_LITE - event_logger_(immutable_db_options_.info_log.get()), bg_work_paused_(0), bg_compaction_paused_(0), refitting_level_(false), opened_successfully_(false), +#ifndef ROCKSDB_LITE + periodic_work_scheduler_(nullptr), +#endif // ROCKSDB_LITE two_write_queues_(options.two_write_queues), manual_wal_flush_(options.manual_wal_flush), // last_sequencee_ is always maintained by the main queue that also writes @@ -225,12 +236,15 @@ own_sfm_(options.sst_file_manager == nullptr), preserve_deletes_(options.preserve_deletes), closed_(false), - error_handler_(this, immutable_db_options_, &mutex_), - atomic_flush_install_cv_(&mutex_) { + atomic_flush_install_cv_(&mutex_), + blob_callback_(immutable_db_options_.sst_file_manager.get(), &mutex_, + &error_handler_, &event_logger_, + immutable_db_options_.listeners, dbname_) { // !batch_per_trx_ implies seq_per_batch_ because it is only unset for // WriteUnprepared, which should use seq_per_batch_. assert(batch_per_txn_ || seq_per_batch_); - env_->GetAbsolutePath(dbname, &db_absolute_path_); + // TODO: Check for an error here + env_->GetAbsolutePath(dbname, &db_absolute_path_).PermitUncheckedError(); // Reserve ten files or so for other uses and give the rest to TableCache. // Give a large number for setting of "infinite" open files. @@ -242,15 +256,18 @@ co.num_shard_bits = immutable_db_options_.table_cache_numshardbits; co.metadata_charge_policy = kDontChargeCacheMetadata; table_cache_ = NewLRUCache(co); + SetDbSessionId(); + assert(!db_session_id_.empty()); versions_.reset(new VersionSet(dbname_, &immutable_db_options_, file_options_, table_cache_.get(), write_buffer_manager_, - &write_controller_, &block_cache_tracer_)); + &write_controller_, &block_cache_tracer_, + io_tracer_, db_session_id_)); column_family_memtables_.reset( new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet())); DumpRocksDBBuildVersion(immutable_db_options_.info_log.get()); - DumpDBFileSummary(immutable_db_options_, dbname_); + DumpDBFileSummary(immutable_db_options_, dbname_, db_session_id_); immutable_db_options_.Dump(immutable_db_options_.info_log.get()); mutable_db_options_.Dump(immutable_db_options_.info_log.get()); DumpSupportInfo(immutable_db_options_.info_log.get()); @@ -259,6 +276,10 @@ // we won't drop any deletion markers until SetPreserveDeletesSequenceNumber() // is called by client and this seqnum is advanced. preserve_deletes_seqnum_.store(0); + + if (write_buffer_manager_) { + wbm_stall_.reset(new WBMStallInterface()); + } } Status DBImpl::Resume() { @@ -294,22 +315,59 @@ // 4. Schedule compactions if needed for all the CFs. This is needed as the // flush in the prior step might have been a no-op for some CFs, which // means a new super version wouldn't have been installed -Status DBImpl::ResumeImpl() { +Status DBImpl::ResumeImpl(DBRecoverContext context) { mutex_.AssertHeld(); WaitForBackgroundWork(); - Status bg_error = error_handler_.GetBGError(); Status s; if (shutdown_initiated_) { // Returning shutdown status to SFM during auto recovery will cause it // to abort the recovery and allow the shutdown to progress s = Status::ShutdownInProgress(); } - if (s.ok() && bg_error.severity() > Status::Severity::kHardError) { - ROCKS_LOG_INFO( - immutable_db_options_.info_log, - "DB resume requested but failed due to Fatal/Unrecoverable error"); - s = bg_error; + + if (s.ok()) { + Status bg_error = error_handler_.GetBGError(); + if (bg_error.severity() > Status::Severity::kHardError) { + ROCKS_LOG_INFO( + immutable_db_options_.info_log, + "DB resume requested but failed due to Fatal/Unrecoverable error"); + s = bg_error; + } + } + + // Make sure the IO Status stored in version set is set to OK. + bool file_deletion_disabled = !IsFileDeletionsEnabled(); + if (s.ok()) { + IOStatus io_s = versions_->io_status(); + if (io_s.IsIOError()) { + // If resuming from IOError resulted from MANIFEST write, then assert + // that we must have already set the MANIFEST writer to nullptr during + // clean-up phase MANIFEST writing. We must have also disabled file + // deletions. + assert(!versions_->descriptor_log_); + assert(file_deletion_disabled); + // Since we are trying to recover from MANIFEST write error, we need to + // switch to a new MANIFEST anyway. The old MANIFEST can be corrupted. + // Therefore, force writing a dummy version edit because we do not know + // whether there are flush jobs with non-empty data to flush, triggering + // appends to MANIFEST. + VersionEdit edit; + auto cfh = + static_cast_with_check(default_cf_handle_); + assert(cfh); + ColumnFamilyData* cfd = cfh->cfd(); + const MutableCFOptions& cf_opts = *cfd->GetLatestMutableCFOptions(); + s = versions_->LogAndApply(cfd, cf_opts, &edit, &mutex_, + directories_.GetDbDir()); + if (!s.ok()) { + io_s = versions_->io_status(); + if (!io_s.ok()) { + s = error_handler_.SetBGError(io_s, + BackgroundErrorReason::kManifestWrite); + } + } + } } // We cannot guarantee consistency of the WAL. So force flush Memtables of @@ -322,18 +380,15 @@ autovector cfds; SelectColumnFamiliesForAtomicFlush(&cfds); mutex_.Unlock(); - s = AtomicFlushMemTables(cfds, flush_opts, FlushReason::kErrorRecovery); + s = AtomicFlushMemTables(cfds, flush_opts, context.flush_reason); mutex_.Lock(); } else { - for (auto cfd : *versions_->GetColumnFamilySet()) { + for (auto cfd : versions_->GetRefedColumnFamilySet()) { if (cfd->IsDropped()) { continue; } - cfd->Ref(); - mutex_.Unlock(); - s = FlushMemTable(cfd, flush_opts, FlushReason::kErrorRecovery); - mutex_.Lock(); - cfd->UnrefAndTryDelete(); + InstrumentedMutexUnlock u(&mutex_); + s = FlushMemTable(cfd, flush_opts, context.flush_reason); if (!s.ok()) { break; } @@ -348,9 +403,6 @@ JobContext job_context(0); FindObsoleteFiles(&job_context, true); - if (s.ok()) { - s = error_handler_.ClearBGError(); - } mutex_.Unlock(); job_context.manifest_file_number = 1; @@ -360,9 +412,42 @@ job_context.Clean(); if (s.ok()) { - ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB"); + assert(versions_->io_status().ok()); + // If we reach here, we should re-enable file deletions if it was disabled + // during previous error handling. + if (file_deletion_disabled) { + // Always return ok + s = EnableFileDeletions(/*force=*/true); + if (!s.ok()) { + ROCKS_LOG_INFO( + immutable_db_options_.info_log, + "DB resume requested but could not enable file deletions [%s]", + s.ToString().c_str()); + assert(false); + } + } } + mutex_.Lock(); + if (s.ok()) { + // This will notify and unblock threads waiting for error recovery to + // finish. Those previouly waiting threads can now proceed, which may + // include closing the db. + s = error_handler_.ClearBGError(); + } else { + // NOTE: this is needed to pass ASSERT_STATUS_CHECKED + // in the DBSSTTest.DBWithMaxSpaceAllowedRandomized test. + // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952 + error_handler_.GetRecoveryError().PermitUncheckedError(); + } + + if (s.ok()) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB"); + } else { + ROCKS_LOG_INFO(immutable_db_options_.info_log, "Failed to resume DB [%s]", + s.ToString().c_str()); + } + // Check for shutdown again before scheduling further compactions, // since we released and re-acquired the lock above if (shutdown_initiated_) { @@ -396,14 +481,12 @@ ROCKS_LOG_INFO(immutable_db_options_.info_log, "Shutdown: canceling all background work"); - if (thread_dump_stats_ != nullptr) { - thread_dump_stats_->cancel(); - thread_dump_stats_.reset(); - } - if (thread_persist_stats_ != nullptr) { - thread_persist_stats_->cancel(); - thread_persist_stats_.reset(); +#ifndef ROCKSDB_LITE + if (periodic_work_scheduler_ != nullptr) { + periodic_work_scheduler_->Unregister(this); } +#endif // !ROCKSDB_LITE + InstrumentedMutexLock l(&mutex_); if (!shutting_down_.load(std::memory_order_acquire) && has_unpersisted_data_.load(std::memory_order_relaxed) && @@ -412,20 +495,19 @@ autovector cfds; SelectColumnFamiliesForAtomicFlush(&cfds); mutex_.Unlock(); - AtomicFlushMemTables(cfds, FlushOptions(), FlushReason::kShutDown); + Status s = + AtomicFlushMemTables(cfds, FlushOptions(), FlushReason::kShutDown); + s.PermitUncheckedError(); //**TODO: What to do on error? mutex_.Lock(); } else { - for (auto cfd : *versions_->GetColumnFamilySet()) { + for (auto cfd : versions_->GetRefedColumnFamilySet()) { if (!cfd->IsDropped() && cfd->initialized() && !cfd->mem()->IsEmpty()) { - cfd->Ref(); - mutex_.Unlock(); - FlushMemTable(cfd, FlushOptions(), FlushReason::kShutDown); - mutex_.Lock(); - cfd->UnrefAndTryDelete(); + InstrumentedMutexUnlock u(&mutex_); + Status s = FlushMemTable(cfd, FlushOptions(), FlushReason::kShutDown); + s.PermitUncheckedError(); //**TODO: What to do on error? } } } - versions_->GetColumnFamilySet()->FreeDeadColumnFamilies(); } shutting_down_.store(true, std::memory_order_release); @@ -447,19 +529,29 @@ } mutex_.Unlock(); + // Below check is added as recovery_error_ is not checked and it causes crash + // in DBSSTTest.DBWithMaxSpaceAllowedWithBlobFiles when space limit is + // reached. + error_handler_.GetRecoveryError().PermitUncheckedError(); + // CancelAllBackgroundWork called with false means we just set the shutdown // marker. After this we do a variant of the waiting and unschedule work // (to consider: moving all the waiting into CancelAllBackgroundWork(true)) CancelAllBackgroundWork(false); - int bottom_compactions_unscheduled = - env_->UnSchedule(this, Env::Priority::BOTTOM); - int compactions_unscheduled = env_->UnSchedule(this, Env::Priority::LOW); - int flushes_unscheduled = env_->UnSchedule(this, Env::Priority::HIGH); - Status ret; + + // Cancel manual compaction if there's any + if (HasPendingManualCompaction()) { + DisableManualCompaction(); + } mutex_.Lock(); - bg_bottom_compaction_scheduled_ -= bottom_compactions_unscheduled; - bg_compaction_scheduled_ -= compactions_unscheduled; - bg_flush_scheduled_ -= flushes_unscheduled; + // Unschedule all tasks for this DB + for (uint8_t i = 0; i < static_cast(TaskType::kCount); i++) { + env_->UnSchedule(GetTaskTag(i), Env::Priority::BOTTOM); + env_->UnSchedule(GetTaskTag(i), Env::Priority::LOW); + env_->UnSchedule(GetTaskTag(i), Env::Priority::HIGH); + } + + Status ret = Status::OK(); // Wait for background work to finish while (bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ || @@ -475,12 +567,45 @@ flush_scheduler_.Clear(); trim_history_scheduler_.Clear(); + // For now, simply trigger a manual flush at close time + // on all the column families. + // TODO(bjlemaire): Check if this is needed. Also, in the + // future we can contemplate doing a more fine-grained + // flushing by first checking if there is a need for + // flushing (but need to implement something + // else than imm()->IsFlushPending() because the output + // memtables added to imm() dont trigger flushes). + if (immutable_db_options_.experimental_mempurge_threshold > 0.0) { + Status flush_ret; + mutex_.Unlock(); + for (ColumnFamilyData* cf : *versions_->GetColumnFamilySet()) { + if (immutable_db_options_.atomic_flush) { + flush_ret = AtomicFlushMemTables({cf}, FlushOptions(), + FlushReason::kManualFlush); + if (!flush_ret.ok()) { + ROCKS_LOG_INFO( + immutable_db_options_.info_log, + "Atomic flush memtables failed upon closing (mempurge)."); + } + } else { + flush_ret = + FlushMemTable(cf, FlushOptions(), FlushReason::kManualFlush); + if (!flush_ret.ok()) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "Flush memtables failed upon closing (mempurge)."); + } + } + } + mutex_.Lock(); + } + while (!flush_queue_.empty()) { const FlushRequest& flush_req = PopFirstFromFlushQueue(); for (const auto& iter : flush_req) { iter.first->UnrefAndTryDelete(); } } + while (!compaction_queue_.empty()) { auto cfd = PopFirstFromCompactionQueue(); cfd->UnrefAndTryDelete(); @@ -533,7 +658,7 @@ ROCKS_LOG_WARN( immutable_db_options_.info_log, "Unable to Sync WAL file %s with error -- %s", - LogFileName(immutable_db_options_.wal_dir, log_number).c_str(), + LogFileName(immutable_db_options_.GetWalDir(), log_number).c_str(), s.ToString().c_str()); // Retain the first error if (ret.ok()) { @@ -567,7 +692,8 @@ versions_.reset(); mutex_.Unlock(); if (db_lock_ != nullptr) { - env_->UnlockFile(db_lock_); + // TODO: Check for unlock error + env_->UnlockFile(db_lock_).PermitUncheckedError(); } ROCKS_LOG_INFO(immutable_db_options_.info_log, "Shutdown complete"); @@ -586,11 +712,15 @@ if (immutable_db_options_.info_log && own_info_log_) { Status s = immutable_db_options_.info_log->Close(); - if (ret.ok()) { + if (!s.ok() && !s.IsNotSupported() && ret.ok()) { ret = s; } } + if (write_buffer_manager_ && wbm_stall_) { + write_buffer_manager_->RemoveDBFromQueue(wbm_stall_.get()); + } + if (ret.IsAborted()) { // Reserve IsAborted() error for those where users didn't release // certain resource and they can release them and come back and @@ -603,9 +733,11 @@ Status DBImpl::CloseImpl() { return CloseHelper(); } DBImpl::~DBImpl() { + InstrumentedMutexLock closing_lock_guard(&closing_mutex_); if (!closed_) { closed_ = true; - CloseHelper(); + closing_status_ = CloseHelper(); + closing_status_.PermitUncheckedError(); } } @@ -620,44 +752,48 @@ } const Status DBImpl::CreateArchivalDirectory() { - if (immutable_db_options_.wal_ttl_seconds > 0 || - immutable_db_options_.wal_size_limit_mb > 0) { - std::string archivalPath = ArchivalDirectory(immutable_db_options_.wal_dir); + if (immutable_db_options_.WAL_ttl_seconds > 0 || + immutable_db_options_.WAL_size_limit_MB > 0) { + std::string archivalPath = + ArchivalDirectory(immutable_db_options_.GetWalDir()); return env_->CreateDirIfMissing(archivalPath); } return Status::OK(); } void DBImpl::PrintStatistics() { - auto dbstats = immutable_db_options_.statistics.get(); + auto dbstats = immutable_db_options_.stats; if (dbstats) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "STATISTICS:\n %s", dbstats->ToString().c_str()); } } -void DBImpl::StartTimedTasks() { - unsigned int stats_dump_period_sec = 0; - unsigned int stats_persist_period_sec = 0; +void DBImpl::StartPeriodicWorkScheduler() { +#ifndef ROCKSDB_LITE + +#ifndef NDEBUG + // It only used by test to disable scheduler + bool disable_scheduler = false; + TEST_SYNC_POINT_CALLBACK( + "DBImpl::StartPeriodicWorkScheduler:DisableScheduler", + &disable_scheduler); + if (disable_scheduler) { + return; + } +#endif // !NDEBUG + { InstrumentedMutexLock l(&mutex_); - stats_dump_period_sec = mutable_db_options_.stats_dump_period_sec; - if (stats_dump_period_sec > 0) { - if (!thread_dump_stats_) { - thread_dump_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( - [this]() { DBImpl::DumpStats(); }, "dump_st", env_, - static_cast(stats_dump_period_sec) * kMicrosInSecond)); - } - } - stats_persist_period_sec = mutable_db_options_.stats_persist_period_sec; - if (stats_persist_period_sec > 0) { - if (!thread_persist_stats_) { - thread_persist_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( - [this]() { DBImpl::PersistStats(); }, "pst_st", env_, - static_cast(stats_persist_period_sec) * kMicrosInSecond)); - } - } + periodic_work_scheduler_ = PeriodicWorkScheduler::Default(); + TEST_SYNC_POINT_CALLBACK("DBImpl::StartPeriodicWorkScheduler:Init", + &periodic_work_scheduler_); } + + periodic_work_scheduler_->Register( + this, mutable_db_options_.stats_dump_period_sec, + mutable_db_options_.stats_persist_period_sec); +#endif // !ROCKSDB_LITE } // esitmate the total size of stats_history_ @@ -683,8 +819,11 @@ if (shutdown_initiated_) { return; } - uint64_t now_seconds = env_->NowMicros() / kMicrosInSecond; - Statistics* statistics = immutable_db_options_.statistics.get(); + TEST_SYNC_POINT("DBImpl::PersistStats:StartRunning"); + uint64_t now_seconds = + immutable_db_options_.clock->NowMicros() / kMicrosInSecond; + + Statistics* statistics = immutable_db_options_.stats; if (!statistics) { return; } @@ -703,29 +842,34 @@ if (immutable_db_options_.persist_stats_to_disk) { WriteBatch batch; + Status s = Status::OK(); if (stats_slice_initialized_) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Reading %" ROCKSDB_PRIszt " stats from statistics\n", stats_slice_.size()); for (const auto& stat : stats_map) { - char key[100]; - int length = - EncodePersistentStatsKey(now_seconds, stat.first, 100, key); - // calculate the delta from last time - if (stats_slice_.find(stat.first) != stats_slice_.end()) { - uint64_t delta = stat.second - stats_slice_[stat.first]; - batch.Put(persist_stats_cf_handle_, Slice(key, std::min(100, length)), - ToString(delta)); + if (s.ok()) { + char key[100]; + int length = + EncodePersistentStatsKey(now_seconds, stat.first, 100, key); + // calculate the delta from last time + if (stats_slice_.find(stat.first) != stats_slice_.end()) { + uint64_t delta = stat.second - stats_slice_[stat.first]; + s = batch.Put(persist_stats_cf_handle_, + Slice(key, std::min(100, length)), ToString(delta)); + } } } } stats_slice_initialized_ = true; std::swap(stats_slice_, stats_map); - WriteOptions wo; - wo.low_pri = true; - wo.no_slowdown = true; - wo.sync = false; - Status s = Write(wo, &batch); + if (s.ok()) { + WriteOptions wo; + wo.low_pri = true; + wo.no_slowdown = true; + wo.sync = false; + s = Write(wo, &batch); + } if (!s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Writing to persistent stats CF failed -- %s", @@ -774,6 +918,7 @@ " bytes, slice count: %" ROCKSDB_PRIszt, stats_history_size, stats_history_.size()); } + TEST_SYNC_POINT("DBImpl::PersistStats:End"); #endif // !ROCKSDB_LITE } @@ -817,31 +962,50 @@ void DBImpl::DumpStats() { TEST_SYNC_POINT("DBImpl::DumpStats:1"); #ifndef ROCKSDB_LITE - const DBPropertyInfo* cf_property_info = - GetPropertyInfo(DB::Properties::kCFStats); - assert(cf_property_info != nullptr); - const DBPropertyInfo* db_property_info = - GetPropertyInfo(DB::Properties::kDBStats); - assert(db_property_info != nullptr); - std::string stats; if (shutdown_initiated_) { return; } + + TEST_SYNC_POINT("DBImpl::DumpStats:StartRunning"); { InstrumentedMutexLock l(&mutex_); - default_cf_internal_stats_->GetStringProperty( - *db_property_info, DB::Properties::kDBStats, &stats); + for (auto cfd : versions_->GetRefedColumnFamilySet()) { + if (cfd->initialized()) { + // Release DB mutex for gathering cache entry stats. Pass over all + // column families for this first so that other stats are dumped + // near-atomically. + InstrumentedMutexUnlock u(&mutex_); + cfd->internal_stats()->CollectCacheEntryStats(/*foreground=*/false); + } + } + + const std::string* property = &DB::Properties::kDBStats; + const DBPropertyInfo* property_info = GetPropertyInfo(*property); + assert(property_info != nullptr); + assert(!property_info->need_out_of_mutex); + default_cf_internal_stats_->GetStringProperty(*property_info, *property, + &stats); + + property = &DB::Properties::kCFStatsNoFileHistogram; + property_info = GetPropertyInfo(*property); + assert(property_info != nullptr); + assert(!property_info->need_out_of_mutex); for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->initialized()) { - cfd->internal_stats()->GetStringProperty( - *cf_property_info, DB::Properties::kCFStatsNoFileHistogram, &stats); + cfd->internal_stats()->GetStringProperty(*property_info, *property, + &stats); } } + + property = &DB::Properties::kCFFileHistogram; + property_info = GetPropertyInfo(*property); + assert(property_info != nullptr); + assert(!property_info->need_out_of_mutex); for (auto cfd : *versions_->GetColumnFamilySet()) { if (cfd->initialized()) { - cfd->internal_stats()->GetStringProperty( - *cf_property_info, DB::Properties::kCFFileHistogram, &stats); + cfd->internal_stats()->GetStringProperty(*property_info, *property, + &stats); } } } @@ -863,12 +1027,18 @@ PrintStatistics(); } +void DBImpl::FlushInfoLog() { + if (shutdown_initiated_) { + return; + } + TEST_SYNC_POINT("DBImpl::FlushInfoLog:StartRunning"); + LogFlush(immutable_db_options_.info_log); +} + Status DBImpl::TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family, int max_entries_to_print, std::string* out_str) { - auto* cfh = - static_cast_with_check( - column_family); + auto* cfh = static_cast_with_check(column_family); ColumnFamilyData* cfd = cfh->cfd(); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); @@ -890,9 +1060,9 @@ } } -Directory* DBImpl::GetDataDir(ColumnFamilyData* cfd, size_t path_id) const { +FSDirectory* DBImpl::GetDataDir(ColumnFamilyData* cfd, size_t path_id) const { assert(cfd); - Directory* ret_dir = cfd->GetDataDir(path_id); + FSDirectory* ret_dir = cfd->GetDataDir(path_id); if (ret_dir == nullptr) { return directories_.GetDataDir(path_id); } @@ -907,7 +1077,8 @@ (void)options_map; return Status::NotSupported("Not supported in ROCKSDB LITE"); #else - auto* cfd = reinterpret_cast(column_family)->cfd(); + auto* cfd = + static_cast_with_check(column_family)->cfd(); if (options_map.empty()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "SetOptions() on column family [%s], empty input", @@ -918,6 +1089,7 @@ MutableCFOptions new_options; Status s; Status persist_options_status; + persist_options_status.PermitUncheckedError(); // Allow uninitialized access SuperVersionContext sv_context(/* create_superversion */ true); { auto db_options = GetDBOptions(); @@ -927,8 +1099,8 @@ new_options = *cfd->GetLatestMutableCFOptions(); // Append new version to recompute compaction score. VersionEdit dummy_edit; - versions_->LogAndApply(cfd, new_options, &dummy_edit, &mutex_, - directories_.GetDbDir()); + s = versions_->LogAndApply(cfd, new_options, &dummy_edit, &mutex_, + directories_.GetDbDir()); // Trigger possible flush/compactions. This has to be before we persist // options to file, otherwise there will be a deadlock with writer // thread. @@ -978,16 +1150,26 @@ MutableDBOptions new_options; Status s; - Status persist_options_status; + Status persist_options_status = Status::OK(); bool wal_changed = false; WriteContext write_context; { InstrumentedMutexLock l(&mutex_); s = GetMutableDBOptionsFromStrings(mutable_db_options_, options_map, &new_options); + if (new_options.bytes_per_sync == 0) { new_options.bytes_per_sync = 1024 * 1024; } + + if (MutableDBOptionsAreEqual(mutable_db_options_, new_options)) { + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "SetDBOptions(), input option value is not changed, " + "skipping updating."); + persist_options_status.PermitUncheckedError(); + return s; + } + DBOptions new_db_options = BuildDBOptions(immutable_db_options_, new_options); if (s.ok()) { @@ -1006,12 +1188,12 @@ } if (s.ok()) { const BGJobLimits current_bg_job_limits = - GetBGJobLimits(immutable_db_options_.max_background_flushes, + GetBGJobLimits(mutable_db_options_.max_background_flushes, mutable_db_options_.max_background_compactions, mutable_db_options_.max_background_jobs, /* parallelize_compactions */ true); const BGJobLimits new_bg_job_limits = GetBGJobLimits( - immutable_db_options_.max_background_flushes, + new_options.max_background_flushes, new_options.max_background_compactions, new_options.max_background_jobs, /* parallelize_compactions */ true); @@ -1036,36 +1218,15 @@ } if (new_options.stats_dump_period_sec != - mutable_db_options_.stats_dump_period_sec) { - if (thread_dump_stats_) { - mutex_.Unlock(); - thread_dump_stats_->cancel(); - mutex_.Lock(); - } - if (new_options.stats_dump_period_sec > 0) { - thread_dump_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( - [this]() { DBImpl::DumpStats(); }, "dump_st", env_, - static_cast(new_options.stats_dump_period_sec) * - kMicrosInSecond)); - } else { - thread_dump_stats_.reset(); - } - } - if (new_options.stats_persist_period_sec != - mutable_db_options_.stats_persist_period_sec) { - if (thread_persist_stats_) { - mutex_.Unlock(); - thread_persist_stats_->cancel(); - mutex_.Lock(); - } - if (new_options.stats_persist_period_sec > 0) { - thread_persist_stats_.reset(new ROCKSDB_NAMESPACE::RepeatableThread( - [this]() { DBImpl::PersistStats(); }, "pst_st", env_, - static_cast(new_options.stats_persist_period_sec) * - kMicrosInSecond)); - } else { - thread_persist_stats_.reset(); - } + mutable_db_options_.stats_dump_period_sec || + new_options.stats_persist_period_sec != + mutable_db_options_.stats_persist_period_sec) { + mutex_.Unlock(); + periodic_work_scheduler_->Unregister(this); + periodic_work_scheduler_->Register( + this, new_options.stats_dump_period_sec, + new_options.stats_persist_period_sec); + mutex_.Lock(); } write_controller_.set_max_delayed_write_rate( new_options.delayed_write_rate); @@ -1097,6 +1258,10 @@ persist_options_status = WriteOptionsFile( false /*need_mutex_lock*/, false /*need_enter_write_thread*/); write_thread_.ExitUnbatched(&w); + } else { + // To get here, we must have had invalid options and will not attempt to + // persist the options, which means the status is "OK/Uninitialized. + persist_options_status.PermitUncheckedError(); } } ROCKS_LOG_INFO(immutable_db_options_.info_log, "SetDBOptions(), inputs:"); @@ -1147,25 +1312,25 @@ Status DBImpl::FlushWAL(bool sync) { if (manual_wal_flush_) { - Status s; + IOStatus io_s; { // We need to lock log_write_mutex_ since logs_ might change concurrently InstrumentedMutexLock wl(&log_write_mutex_); log::Writer* cur_log_writer = logs_.back().writer; - s = cur_log_writer->WriteBuffer(); + io_s = cur_log_writer->WriteBuffer(); } - if (!s.ok()) { + if (!io_s.ok()) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s", - s.ToString().c_str()); + io_s.ToString().c_str()); // In case there is a fs error we should set it globally to prevent the // future writes - WriteStatusCheck(s); + IOStatusCheck(io_s); // whether sync or not, we should abort the rest of function upon error - return s; + return std::move(io_s); } if (!sync) { ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "FlushWAL sync=false"); - return s; + return std::move(io_s); } } if (!sync) { @@ -1217,21 +1382,36 @@ TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:1"); RecordTick(stats_, WAL_FILE_SYNCED); Status status; + IOStatus io_s; for (log::Writer* log : logs_to_sync) { - status = log->file()->SyncWithoutFlush(immutable_db_options_.use_fsync); - if (!status.ok()) { + io_s = log->file()->SyncWithoutFlush(immutable_db_options_.use_fsync); + if (!io_s.ok()) { + status = io_s; break; } } + if (!io_s.ok()) { + ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL Sync error %s", + io_s.ToString().c_str()); + // In case there is a fs error we should set it globally to prevent the + // future writes + IOStatusCheck(io_s); + } if (status.ok() && need_log_dir_sync) { - status = directories_.GetWalDir()->Fsync(); + status = directories_.GetWalDir()->FsyncWithDirOptions( + IOOptions(), nullptr, + DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced)); } TEST_SYNC_POINT("DBWALTest::SyncWALNotWaitWrite:2"); TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:1"); { InstrumentedMutexLock l(&mutex_); - MarkLogsSynced(current_log_number, need_log_dir_sync, status); + if (status.ok()) { + status = MarkLogsSynced(current_log_number, need_log_dir_sync); + } else { + MarkLogsNotSynced(current_log_number); + } } TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:2"); @@ -1249,7 +1429,7 @@ // future writes WriteStatusCheck(status); } - return status; + return std::move(status); } Status DBImpl::UnlockWAL() { @@ -1257,27 +1437,54 @@ return Status::OK(); } -void DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir, - const Status& status) { +Status DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir) { mutex_.AssertHeld(); - if (synced_dir && logfile_number_ == up_to && status.ok()) { + if (synced_dir && logfile_number_ == up_to) { log_dir_synced_ = true; } + VersionEdit synced_wals; for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to;) { - auto& log = *it; - assert(log.getting_synced); - if (status.ok() && logs_.size() > 1) { - logs_to_free_.push_back(log.ReleaseWriter()); + auto& wal = *it; + assert(wal.getting_synced); + if (logs_.size() > 1) { + if (immutable_db_options_.track_and_verify_wals_in_manifest && + wal.writer->file()->GetFileSize() > 0) { + synced_wals.AddWal(wal.number, + WalMetadata(wal.writer->file()->GetFileSize())); + } + logs_to_free_.push_back(wal.ReleaseWriter()); // To modify logs_ both mutex_ and log_write_mutex_ must be held InstrumentedMutexLock l(&log_write_mutex_); it = logs_.erase(it); } else { - log.getting_synced = false; + wal.getting_synced = false; ++it; } } - assert(!status.ok() || logs_.empty() || logs_[0].number > up_to || + assert(logs_.empty() || logs_[0].number > up_to || (logs_.size() == 1 && !logs_[0].getting_synced)); + + Status s; + if (synced_wals.IsWalAddition()) { + // not empty, write to MANIFEST. + s = versions_->LogAndApplyToDefaultColumnFamily(&synced_wals, &mutex_); + if (!s.ok() && versions_->io_status().IsIOError()) { + s = error_handler_.SetBGError(versions_->io_status(), + BackgroundErrorReason::kManifestWrite); + } + } + log_sync_cv_.SignalAll(); + return s; +} + +void DBImpl::MarkLogsNotSynced(uint64_t up_to) { + mutex_.AssertHeld(); + for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to; + ++it) { + auto& wal = *it; + assert(wal.getting_synced); + wal.getting_synced = false; + } log_sync_cv_.SignalAll(); } @@ -1298,23 +1505,49 @@ } } -InternalIterator* DBImpl::NewInternalIterator( - Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence, - ColumnFamilyHandle* column_family) { +Status DBImpl::GetFullHistoryTsLow(ColumnFamilyHandle* column_family, + std::string* ts_low) { + if (ts_low == nullptr) { + return Status::InvalidArgument("ts_low is nullptr"); + } + ColumnFamilyData* cfd = nullptr; + if (column_family == nullptr) { + cfd = default_cf_handle_->cfd(); + } else { + auto cfh = static_cast_with_check(column_family); + assert(cfh != nullptr); + cfd = cfh->cfd(); + } + assert(cfd != nullptr && cfd->user_comparator() != nullptr); + if (cfd->user_comparator()->timestamp_size() == 0) { + return Status::InvalidArgument( + "Timestamp is not enabled in this column family"); + } + InstrumentedMutexLock l(&mutex_); + *ts_low = cfd->GetFullHistoryTsLow(); + assert(cfd->user_comparator()->timestamp_size() == ts_low->size()); + return Status::OK(); +} + +InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options, + Arena* arena, + RangeDelAggregator* range_del_agg, + SequenceNumber sequence, + ColumnFamilyHandle* column_family, + bool allow_unprepared_value) { ColumnFamilyData* cfd; if (column_family == nullptr) { cfd = default_cf_handle_->cfd(); } else { - auto cfh = reinterpret_cast(column_family); + auto cfh = static_cast_with_check(column_family); cfd = cfh->cfd(); } mutex_.Lock(); SuperVersion* super_version = cfd->GetSuperVersion()->Ref(); mutex_.Unlock(); - ReadOptions roptions; - return NewInternalIterator(roptions, cfd, super_version, arena, range_del_agg, - sequence); + return NewInternalIterator(read_options, cfd, super_version, arena, + range_del_agg, sequence, allow_unprepared_value); } void DBImpl::SchedulePurge() { @@ -1346,6 +1579,8 @@ mutex_.Lock(); } + assert(bg_purge_scheduled_ > 0); + // Can't use iterator to go over purge_files_ because inside the loop we're // unlocking the mutex that protects purge_files_. while (!purge_files_.empty()) { @@ -1413,17 +1648,7 @@ delete state->super_version; } if (job_context.HaveSomethingToDelete()) { - if (state->background_purge) { - // PurgeObsoleteFiles here does not delete files. Instead, it adds the - // files to be deleted to a job queue, and deletes it in a separate - // background thread. - state->db->PurgeObsoleteFiles(job_context, true /* schedule only */); - state->mu->Lock(); - state->db->SchedulePurge(); - state->mu->Unlock(); - } else { - state->db->PurgeObsoleteFiles(job_context); - } + state->db->PurgeObsoleteFiles(job_context, state->background_purge); } job_context.Clean(); } @@ -1437,7 +1662,8 @@ SuperVersion* super_version, Arena* arena, RangeDelAggregator* range_del_agg, - SequenceNumber sequence) { + SequenceNumber sequence, + bool allow_unprepared_value) { InternalIterator* internal_iter; assert(arena != nullptr); assert(range_del_agg != nullptr); @@ -1469,7 +1695,8 @@ // Collect iterators for files in L0 - Ln if (read_options.read_tier != kMemtableTier) { super_version->current->AddIterators(read_options, file_options_, - &merge_iter_builder, range_del_agg); + &merge_iter_builder, range_del_agg, + allow_unprepared_value); } internal_iter = merge_iter_builder.Finish(); IterState* cleanup = @@ -1496,22 +1723,57 @@ Status DBImpl::Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) { + return Get(read_options, column_family, key, value, /*timestamp=*/nullptr); +} + +Status DBImpl::Get(const ReadOptions& read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value, std::string* timestamp) { GetImplOptions get_impl_options; get_impl_options.column_family = column_family; get_impl_options.value = value; - return GetImpl(read_options, key, get_impl_options); + get_impl_options.timestamp = timestamp; + Status s = GetImpl(read_options, key, get_impl_options); + return s; } +namespace { +class GetWithTimestampReadCallback : public ReadCallback { + public: + explicit GetWithTimestampReadCallback(SequenceNumber seq) + : ReadCallback(seq) {} + bool IsVisibleFullCheck(SequenceNumber seq) override { + return seq <= max_visible_seq_; + } +}; +} // namespace + Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key, - GetImplOptions get_impl_options) { + GetImplOptions& get_impl_options) { assert(get_impl_options.value != nullptr || get_impl_options.merge_operands != nullptr); - PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); - StopWatch sw(env_, stats_, DB_GET); + + assert(get_impl_options.column_family); + const Comparator* ucmp = get_impl_options.column_family->GetComparator(); + assert(ucmp); + size_t ts_sz = ucmp->timestamp_size(); + GetWithTimestampReadCallback read_cb(0); // Will call Refresh + +#ifndef NDEBUG + if (ts_sz > 0) { + assert(read_options.timestamp); + assert(read_options.timestamp->size() == ts_sz); + } else { + assert(!read_options.timestamp); + } +#endif // NDEBUG + + PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); + StopWatch sw(immutable_db_options_.clock, stats_, DB_GET); PERF_TIMER_GUARD(get_snapshot_time); - auto cfh = - reinterpret_cast(get_impl_options.column_family); + auto cfh = static_cast_with_check( + get_impl_options.column_family); auto cfd = cfh->cfd(); if (tracer_) { @@ -1519,7 +1781,8 @@ // tracing is enabled. InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { - tracer_->Get(get_impl_options.column_family, key); + // TODO: maybe handle the tracing status? + tracer_->Get(get_impl_options.column_family, key).PermitUncheckedError(); } } @@ -1544,9 +1807,11 @@ // data for the snapshot, so the reader would see neither data that was be // visible to the snapshot before compaction nor the newer data inserted // afterwards. - snapshot = last_seq_same_as_publish_seq_ - ? versions_->LastSequence() - : versions_->LastPublishedSequence(); + if (last_seq_same_as_publish_seq_) { + snapshot = versions_->LastSequence(); + } else { + snapshot = versions_->LastPublishedSequence(); + } if (get_impl_options.callback) { // The unprep_seqs are not published for write unprepared, so it could be // that max_visible_seq is larger. Seek to the std::max of the two. @@ -1566,6 +1831,16 @@ snapshot = get_impl_options.callback->max_visible_seq(); } } + // If timestamp is used, we use read callback to ensure is returned + // only if t <= read_opts.timestamp and s <= snapshot. + // HACK: temporarily overwrite input struct field but restore + SaveAndRestore restore_callback(&get_impl_options.callback); + if (ts_sz > 0) { + assert(!get_impl_options + .callback); // timestamp with callback is not supported + read_cb.Refresh(snapshot); + get_impl_options.callback = &read_cb; + } TEST_SYNC_POINT("DBImpl::GetImpl:3"); TEST_SYNC_POINT("DBImpl::GetImpl:4"); @@ -1583,10 +1858,11 @@ bool skip_memtable = (read_options.read_tier == kPersistedTier && has_unpersisted_data_.load(std::memory_order_relaxed)); bool done = false; + std::string* timestamp = ts_sz > 0 ? get_impl_options.timestamp : nullptr; if (!skip_memtable) { // Get value associated with key if (get_impl_options.get_value) { - if (sv->mem->Get(lkey, get_impl_options.value->GetSelf(), &s, + if (sv->mem->Get(lkey, get_impl_options.value->GetSelf(), timestamp, &s, &merge_context, &max_covering_tombstone_seq, read_options, get_impl_options.callback, get_impl_options.is_blob_index)) { @@ -1594,9 +1870,10 @@ get_impl_options.value->PinSelf(); RecordTick(stats_, MEMTABLE_HIT); } else if ((s.ok() || s.IsMergeInProgress()) && - sv->imm->Get(lkey, get_impl_options.value->GetSelf(), &s, - &merge_context, &max_covering_tombstone_seq, - read_options, get_impl_options.callback, + sv->imm->Get(lkey, get_impl_options.value->GetSelf(), + timestamp, &s, &merge_context, + &max_covering_tombstone_seq, read_options, + get_impl_options.callback, get_impl_options.is_blob_index)) { done = true; get_impl_options.value->PinSelf(); @@ -1605,9 +1882,9 @@ } else { // Get Merge Operands associated with key, Merge Operands should not be // merged and raw values should be returned to the user. - if (sv->mem->Get(lkey, nullptr, &s, &merge_context, - &max_covering_tombstone_seq, read_options, nullptr, - nullptr, false)) { + if (sv->mem->Get(lkey, /*value*/ nullptr, /*timestamp=*/nullptr, &s, + &merge_context, &max_covering_tombstone_seq, + read_options, nullptr, nullptr, false)) { done = true; RecordTick(stats_, MEMTABLE_HIT); } else if ((s.ok() || s.IsMergeInProgress()) && @@ -1623,11 +1900,12 @@ return s; } } + PinnedIteratorsManager pinned_iters_mgr; if (!done) { PERF_TIMER_GUARD(get_from_output_files_time); sv->current->Get( - read_options, lkey, get_impl_options.value, &s, &merge_context, - &max_covering_tombstone_seq, + read_options, lkey, get_impl_options.value, timestamp, &s, + &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr, get_impl_options.get_value ? get_impl_options.value_found : nullptr, nullptr, nullptr, get_impl_options.get_value ? get_impl_options.callback : nullptr, @@ -1675,17 +1953,49 @@ const ReadOptions& read_options, const std::vector& column_family, const std::vector& keys, std::vector* values) { - PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); - StopWatch sw(env_, stats_, DB_MULTIGET); + return MultiGet(read_options, column_family, keys, values, + /*timestamps=*/nullptr); +} + +std::vector DBImpl::MultiGet( + const ReadOptions& read_options, + const std::vector& column_family, + const std::vector& keys, std::vector* values, + std::vector* timestamps) { + PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); + StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET); PERF_TIMER_GUARD(get_snapshot_time); +#ifndef NDEBUG + for (const auto* cfh : column_family) { + assert(cfh); + const Comparator* const ucmp = cfh->GetComparator(); + assert(ucmp); + if (ucmp->timestamp_size() > 0) { + assert(read_options.timestamp); + assert(ucmp->timestamp_size() == read_options.timestamp->size()); + } else { + assert(!read_options.timestamp); + } + } +#endif // NDEBUG + + if (tracer_) { + // TODO: This mutex should be removed later, to improve performance when + // tracing is enabled. + InstrumentedMutexLock lock(&trace_mutex_); + if (tracer_) { + // TODO: maybe handle the tracing status? + tracer_->MultiGet(column_family, keys).PermitUncheckedError(); + } + } + SequenceNumber consistent_seqnum; - ; std::unordered_map multiget_cf_data( column_family.size()); for (auto cf : column_family) { - auto cfh = reinterpret_cast(cf); + auto cfh = static_cast_with_check(cf); auto cfd = cfh->cfd(); if (multiget_cf_data.find(cfd->GetID()) == multiget_cf_data.end()) { multiget_cf_data.emplace(cfd->GetID(), @@ -1704,6 +2014,9 @@ read_options, nullptr, iter_deref_lambda, &multiget_cf_data, &consistent_seqnum); + TEST_SYNC_POINT("DBImpl::MultiGet:AfterGetSeqNum1"); + TEST_SYNC_POINT("DBImpl::MultiGet:AfterGetSeqNum2"); + // Contain a list of merge operations if merge occurs. MergeContext merge_context; @@ -1711,6 +2024,9 @@ size_t num_keys = keys.size(); std::vector stat_list(num_keys); values->resize(num_keys); + if (timestamps) { + timestamps->resize(num_keys); + } // Keep track of bytes that we read for statistics-recording later uint64_t bytes_read = 0; @@ -1721,13 +2037,25 @@ // s is both in/out. When in, s could either be OK or MergeInProgress. // merge_operands will contain the sequence of merges in the latter case. size_t num_found = 0; - for (size_t i = 0; i < num_keys; ++i) { - merge_context.Clear(); - Status& s = stat_list[i]; - std::string* value = &(*values)[i]; + size_t keys_read; + uint64_t curr_value_size = 0; - LookupKey lkey(keys[i], consistent_seqnum); - auto cfh = reinterpret_cast(column_family[i]); + GetWithTimestampReadCallback timestamp_read_callback(0); + ReadCallback* read_callback = nullptr; + if (read_options.timestamp && read_options.timestamp->size() > 0) { + timestamp_read_callback.Refresh(consistent_seqnum); + read_callback = ×tamp_read_callback; + } + + for (keys_read = 0; keys_read < num_keys; ++keys_read) { + merge_context.Clear(); + Status& s = stat_list[keys_read]; + std::string* value = &(*values)[keys_read]; + std::string* timestamp = timestamps ? &(*timestamps)[keys_read] : nullptr; + + LookupKey lkey(keys[keys_read], consistent_seqnum, read_options.timestamp); + auto cfh = + static_cast_with_check(column_family[keys_read]); SequenceNumber max_covering_tombstone_seq = 0; auto mgd_iter = multiget_cf_data.find(cfh->cfd()->GetID()); assert(mgd_iter != multiget_cf_data.end()); @@ -1738,13 +2066,15 @@ has_unpersisted_data_.load(std::memory_order_relaxed)); bool done = false; if (!skip_memtable) { - if (super_version->mem->Get(lkey, value, &s, &merge_context, - &max_covering_tombstone_seq, read_options)) { + if (super_version->mem->Get(lkey, value, timestamp, &s, &merge_context, + &max_covering_tombstone_seq, read_options, + read_callback)) { done = true; RecordTick(stats_, MEMTABLE_HIT); - } else if (super_version->imm->Get(lkey, value, &s, &merge_context, + } else if (super_version->imm->Get(lkey, value, timestamp, &s, + &merge_context, &max_covering_tombstone_seq, - read_options)) { + read_options, read_callback)) { done = true; RecordTick(stats_, MEMTABLE_HIT); } @@ -1752,8 +2082,13 @@ if (!done) { PinnableSlice pinnable_val; PERF_TIMER_GUARD(get_from_output_files_time); - super_version->current->Get(read_options, lkey, &pinnable_val, &s, - &merge_context, &max_covering_tombstone_seq); + PinnedIteratorsManager pinned_iters_mgr; + super_version->current->Get(read_options, lkey, &pinnable_val, timestamp, + &s, &merge_context, + &max_covering_tombstone_seq, + &pinned_iters_mgr, /*value_found=*/nullptr, + /*key_exists=*/nullptr, + /*seq=*/nullptr, read_callback); value->assign(pinnable_val.data(), pinnable_val.size()); RecordTick(stats_, MEMTABLE_MISS); } @@ -1761,6 +2096,28 @@ if (s.ok()) { bytes_read += value->size(); num_found++; + curr_value_size += value->size(); + if (curr_value_size > read_options.value_size_soft_limit) { + while (++keys_read < num_keys) { + stat_list[keys_read] = Status::Aborted(); + } + break; + } + } + if (read_options.deadline.count() && + immutable_db_options_.clock->NowMicros() > + static_cast(read_options.deadline.count())) { + break; + } + } + + if (keys_read < num_keys) { + // The only reason to break out of the loop is when the deadline is + // exceeded + assert(immutable_db_options_.clock->NowMicros() > + static_cast(read_options.deadline.count())); + for (++keys_read; keys_read < num_keys; ++keys_read) { + stat_list[keys_read] = Status::TimedOut(); } } @@ -1827,16 +2184,18 @@ // version because a flush happening in between may compact away data for // the snapshot, but the snapshot is earlier than the data overwriting it, // so users may see wrong results. - *snapshot = last_seq_same_as_publish_seq_ - ? versions_->LastSequence() - : versions_->LastPublishedSequence(); + if (last_seq_same_as_publish_seq_) { + *snapshot = versions_->LastSequence(); + } else { + *snapshot = versions_->LastPublishedSequence(); + } } } else { // If we end up with the same issue of memtable geting sealed during 2 // consecutive retries, it means the write rate is very high. In that case // its probably ok to take the mutex on the 3rd try so we can succeed for // sure - static const int num_retries = 3; + constexpr int num_retries = 3; for (int i = 0; i < num_retries; ++i) { last_try = (i == num_retries - 1); bool retry = false; @@ -1860,12 +2219,15 @@ // acquire the lock so we're sure to succeed mutex_.Lock(); } - *snapshot = last_seq_same_as_publish_seq_ - ? versions_->LastSequence() - : versions_->LastPublishedSequence(); + if (last_seq_same_as_publish_seq_) { + *snapshot = versions_->LastSequence(); + } else { + *snapshot = versions_->LastPublishedSequence(); + } } else { - *snapshot = reinterpret_cast(read_options.snapshot) - ->number_; + *snapshot = + static_cast_with_check(read_options.snapshot) + ->number_; } for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end(); ++cf_iter) { @@ -1915,14 +2277,49 @@ ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input) { + return MultiGet(read_options, num_keys, column_families, keys, values, + /*timestamps=*/nullptr, statuses, sorted_input); +} + +void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys, + ColumnFamilyHandle** column_families, const Slice* keys, + PinnableSlice* values, std::string* timestamps, + Status* statuses, const bool sorted_input) { if (num_keys == 0) { return; } + +#ifndef NDEBUG + for (size_t i = 0; i < num_keys; ++i) { + ColumnFamilyHandle* cfh = column_families[i]; + assert(cfh); + const Comparator* const ucmp = cfh->GetComparator(); + assert(ucmp); + if (ucmp->timestamp_size() > 0) { + assert(read_options.timestamp); + assert(read_options.timestamp->size() == ucmp->timestamp_size()); + } else { + assert(!read_options.timestamp); + } + } +#endif // NDEBUG + + if (tracer_) { + // TODO: This mutex should be removed later, to improve performance when + // tracing is enabled. + InstrumentedMutexLock lock(&trace_mutex_); + if (tracer_) { + // TODO: maybe handle the tracing status? + tracer_->MultiGet(num_keys, column_families, keys).PermitUncheckedError(); + } + } + autovector key_context; autovector sorted_keys; sorted_keys.resize(num_keys); for (size_t i = 0; i < num_keys; ++i) { key_context.emplace_back(column_families[i], keys[i], &values[i], + timestamps ? ×tamps[i] : nullptr, &statuses[i]); } for (size_t i = 0; i < num_keys; ++i) { @@ -1934,20 +2331,18 @@ multiget_cf_data; size_t cf_start = 0; ColumnFamilyHandle* cf = sorted_keys[0]->column_family; + for (size_t i = 0; i < num_keys; ++i) { KeyContext* key_ctx = sorted_keys[i]; if (key_ctx->column_family != cf) { - multiget_cf_data.emplace_back( - MultiGetColumnFamilyData(cf, cf_start, i - cf_start, nullptr)); + multiget_cf_data.emplace_back(cf, cf_start, i - cf_start, nullptr); cf_start = i; cf = key_ctx->column_family; } } - { - // multiget_cf_data.emplace_back( - // MultiGetColumnFamilyData(cf, cf_start, num_keys - cf_start, nullptr)); - multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr); - } + + multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr); + std::function::iterator&)> @@ -1963,14 +2358,38 @@ read_options, nullptr, iter_deref_lambda, &multiget_cf_data, &consistent_seqnum); - for (auto cf_iter = multiget_cf_data.begin(); - cf_iter != multiget_cf_data.end(); ++cf_iter) { - MultiGetImpl(read_options, cf_iter->start, cf_iter->num_keys, &sorted_keys, - cf_iter->super_version, consistent_seqnum, nullptr, nullptr); + GetWithTimestampReadCallback timestamp_read_callback(0); + ReadCallback* read_callback = nullptr; + if (read_options.timestamp && read_options.timestamp->size() > 0) { + timestamp_read_callback.Refresh(consistent_seqnum); + read_callback = ×tamp_read_callback; + } + + Status s; + auto cf_iter = multiget_cf_data.begin(); + for (; cf_iter != multiget_cf_data.end(); ++cf_iter) { + s = MultiGetImpl(read_options, cf_iter->start, cf_iter->num_keys, + &sorted_keys, cf_iter->super_version, consistent_seqnum, + read_callback); + if (!s.ok()) { + break; + } + } + if (!s.ok()) { + assert(s.IsTimedOut() || s.IsAborted()); + for (++cf_iter; cf_iter != multiget_cf_data.end(); ++cf_iter) { + for (size_t i = cf_iter->start; i < cf_iter->start + cf_iter->num_keys; + ++i) { + *sorted_keys[i]->s = s; + } + } + } + + for (const auto& iter : multiget_cf_data) { if (!unref_only) { - ReturnAndCleanupSuperVersion(cf_iter->cfd, cf_iter->super_version); + ReturnAndCleanupSuperVersion(iter.cfd, iter.super_version); } else { - cf_iter->cfd->GetSuperVersion()->Unref(); + iter.cfd->GetSuperVersion()->Unref(); } } } @@ -1983,7 +2402,7 @@ static_cast(lhs->column_family); uint32_t cfd_id1 = cfh->cfd()->GetID(); const Comparator* comparator = cfh->cfd()->user_comparator(); - cfh = static_cast(lhs->column_family); + cfh = static_cast(rhs->column_family); uint32_t cfd_id2 = cfh->cfd()->GetID(); if (cfd_id1 < cfd_id2) { @@ -1993,7 +2412,8 @@ } // Both keys are from the same column family - int cmp = comparator->Compare(*(lhs->key), *(rhs->key)); + int cmp = comparator->CompareWithoutTimestamp( + *(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false); if (cmp < 0) { return true; } @@ -2006,48 +2426,47 @@ void DBImpl::PrepareMultiGetKeys( size_t num_keys, bool sorted_input, autovector* sorted_keys) { -#ifndef NDEBUG if (sorted_input) { - for (size_t index = 0; index < sorted_keys->size(); ++index) { - if (index > 0) { - KeyContext* lhs = (*sorted_keys)[index - 1]; - KeyContext* rhs = (*sorted_keys)[index]; - ColumnFamilyHandleImpl* cfh = - reinterpret_cast(lhs->column_family); - uint32_t cfd_id1 = cfh->cfd()->GetID(); - const Comparator* comparator = cfh->cfd()->user_comparator(); - cfh = reinterpret_cast(lhs->column_family); - uint32_t cfd_id2 = cfh->cfd()->GetID(); - - assert(cfd_id1 <= cfd_id2); - if (cfd_id1 < cfd_id2) { - continue; - } - - // Both keys are from the same column family - int cmp = comparator->Compare(*(lhs->key), *(rhs->key)); - assert(cmp <= 0); - } - index++; - } - } +#ifndef NDEBUG + assert(std::is_sorted(sorted_keys->begin(), sorted_keys->end(), + CompareKeyContext())); #endif - if (!sorted_input) { - CompareKeyContext sort_comparator; - std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys, - sort_comparator); + return; } + + std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys, + CompareKeyContext()); } void DBImpl::MultiGet(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input) { + return MultiGet(read_options, column_family, num_keys, keys, values, + /*timestamp=*/nullptr, statuses, sorted_input); +} + +void DBImpl::MultiGet(const ReadOptions& read_options, + ColumnFamilyHandle* column_family, const size_t num_keys, + const Slice* keys, PinnableSlice* values, + std::string* timestamps, Status* statuses, + const bool sorted_input) { + if (tracer_) { + // TODO: This mutex should be removed later, to improve performance when + // tracing is enabled. + InstrumentedMutexLock lock(&trace_mutex_); + if (tracer_) { + // TODO: maybe handle the tracing status? + tracer_->MultiGet(num_keys, column_family, keys).PermitUncheckedError(); + } + } autovector key_context; autovector sorted_keys; sorted_keys.resize(num_keys); for (size_t i = 0; i < num_keys; ++i) { - key_context.emplace_back(column_family, keys[i], &values[i], &statuses[i]); + key_context.emplace_back(column_family, keys[i], &values[i], + timestamps ? ×tamps[i] : nullptr, + &statuses[i]); } for (size_t i = 0; i < num_keys; ++i) { sorted_keys[i] = &key_context[i]; @@ -2100,33 +2519,61 @@ consistent_seqnum = callback->max_visible_seq(); } - MultiGetImpl(read_options, 0, num_keys, sorted_keys, - multiget_cf_data[0].super_version, consistent_seqnum, nullptr, - nullptr); + GetWithTimestampReadCallback timestamp_read_callback(0); + ReadCallback* read_callback = callback; + if (read_options.timestamp && read_options.timestamp->size() > 0) { + assert(!read_callback); // timestamp with callback is not supported + timestamp_read_callback.Refresh(consistent_seqnum); + read_callback = ×tamp_read_callback; + } + + Status s = MultiGetImpl(read_options, 0, num_keys, sorted_keys, + multiget_cf_data[0].super_version, consistent_seqnum, + read_callback); + assert(s.ok() || s.IsTimedOut() || s.IsAborted()); ReturnAndCleanupSuperVersion(multiget_cf_data[0].cfd, multiget_cf_data[0].super_version); } -void DBImpl::MultiGetImpl( +// The actual implementation of batched MultiGet. Parameters - +// start_key - Index in the sorted_keys vector to start processing from +// num_keys - Number of keys to lookup, starting with sorted_keys[start_key] +// sorted_keys - The entire batch of sorted keys for this CF +// +// The per key status is returned in the KeyContext structures pointed to by +// sorted_keys. An overall Status is also returned, with the only possible +// values being Status::OK() and Status::TimedOut(). The latter indicates +// that the call exceeded read_options.deadline +Status DBImpl::MultiGetImpl( const ReadOptions& read_options, size_t start_key, size_t num_keys, autovector* sorted_keys, SuperVersion* super_version, SequenceNumber snapshot, - ReadCallback* callback, bool* is_blob_index) { - PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); - StopWatch sw(env_, stats_, DB_MULTIGET); + ReadCallback* callback) { + PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); + StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET); // For each of the given keys, apply the entire "get" process as follows: // First look in the memtable, then in the immutable memtable (if any). // s is both in/out. When in, s could either be OK or MergeInProgress. // merge_operands will contain the sequence of merges in the latter case. size_t keys_left = num_keys; + Status s; + uint64_t curr_value_size = 0; while (keys_left) { + if (read_options.deadline.count() && + immutable_db_options_.clock->NowMicros() > + static_cast(read_options.deadline.count())) { + s = Status::TimedOut(); + break; + } + size_t batch_size = (keys_left > MultiGetContext::MAX_BATCH_SIZE) ? MultiGetContext::MAX_BATCH_SIZE : keys_left; MultiGetContext ctx(sorted_keys, start_key + num_keys - keys_left, - batch_size, snapshot); + batch_size, snapshot, read_options); MultiGetRange range = ctx.GetMultiGetRange(); + range.AddValueSize(curr_value_size); bool lookup_current = false; keys_left -= batch_size; @@ -2140,11 +2587,9 @@ (read_options.read_tier == kPersistedTier && has_unpersisted_data_.load(std::memory_order_relaxed)); if (!skip_memtable) { - super_version->mem->MultiGet(read_options, &range, callback, - is_blob_index); + super_version->mem->MultiGet(read_options, &range, callback); if (!range.empty()) { - super_version->imm->MultiGet(read_options, &range, callback, - is_blob_index); + super_version->imm->MultiGet(read_options, &range, callback); } if (!range.empty()) { lookup_current = true; @@ -2154,8 +2599,12 @@ } if (lookup_current) { PERF_TIMER_GUARD(get_from_output_files_time); - super_version->current->MultiGet(read_options, &range, callback, - is_blob_index); + super_version->current->MultiGet(read_options, &range, callback); + } + curr_value_size = range.GetValueSize(); + if (curr_value_size > read_options.value_size_soft_limit) { + s = Status::Aborted(); + break; } } @@ -2163,13 +2612,21 @@ PERF_TIMER_GUARD(get_post_process_time); size_t num_found = 0; uint64_t bytes_read = 0; - for (size_t i = start_key; i < start_key + num_keys; ++i) { + for (size_t i = start_key; i < start_key + num_keys - keys_left; ++i) { KeyContext* key = (*sorted_keys)[i]; if (key->s->ok()) { bytes_read += key->value->size(); num_found++; } } + if (keys_left) { + assert(s.IsTimedOut() || s.IsAborted()); + for (size_t i = start_key + num_keys - keys_left; i < start_key + num_keys; + ++i) { + KeyContext* key = (*sorted_keys)[i]; + *key->s = s; + } + } RecordTick(stats_, NUMBER_MULTIGET_CALLS); RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys); @@ -2178,6 +2635,8 @@ RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read); PERF_COUNTER_ADD(multiget_read_bytes, bytes_read); PERF_TIMER_STOP(get_post_process_time); + + return s; } Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options, @@ -2252,7 +2711,6 @@ const std::string& column_family_name, ColumnFamilyHandle** handle) { Status s; - Status persist_options_status; *handle = nullptr; DBOptions db_options = @@ -2301,7 +2759,7 @@ auto* cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name); assert(cfd != nullptr); - std::map> dummy_created_dirs; + std::map> dummy_created_dirs; s = cfd->AddDirectories(&dummy_created_dirs); } if (s.ok()) { @@ -2333,7 +2791,7 @@ // this is outside the mutex if (s.ok()) { NewThreadStatusCfInfo( - reinterpret_cast(*handle)->cfd()); + static_cast_with_check(*handle)->cfd()); } return s; } @@ -2370,7 +2828,7 @@ } Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) { - auto cfh = reinterpret_cast(column_family); + auto cfh = static_cast_with_check(column_family); auto cfd = cfh->cfd(); if (cfd->GetID() == 0) { return Status::InvalidArgument("Can't drop default column family"); @@ -2436,7 +2894,8 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, - std::string* value, bool* value_found) { + std::string* value, std::string* timestamp, + bool* value_found) { assert(value != nullptr); if (value_found != nullptr) { // falsify later if key-may-exist but can't fetch value @@ -2449,6 +2908,7 @@ get_impl_options.column_family = column_family; get_impl_options.value = &pinnable_val; get_impl_options.value_found = value_found; + get_impl_options.timestamp = timestamp; auto s = GetImpl(roptions, key, get_impl_options); value->assign(pinnable_val.data(), pinnable_val.size()); @@ -2471,6 +2931,13 @@ } // if iterator wants internal keys, we can only proceed if // we can guarantee the deletes haven't been processed yet + if (read_options.iter_start_seqnum > 0 && + !iter_start_seqnum_deprecation_warned_.exchange(true)) { + ROCKS_LOG_WARN( + immutable_db_options_.info_log, + "iter_start_seqnum is deprecated, will be removed in a future release. " + "Please try using user-defined timestamp instead."); + } if (immutable_db_options_.preserve_deletes && read_options.iter_start_seqnum > 0 && read_options.iter_start_seqnum < preserve_deletes_seqnum_.load()) { @@ -2478,8 +2945,9 @@ "Iterator requested internal keys which are too old and are not" " guaranteed to be preserved, try larger iter_start_seqnum opt.")); } - auto cfh = reinterpret_cast(column_family); - auto cfd = cfh->cfd(); + auto cfh = static_cast_with_check(column_family); + ColumnFamilyData* cfd = cfh->cfd(); + assert(cfd != nullptr); ReadCallback* read_callback = nullptr; // No read callback provided. if (read_options.tailing) { #ifdef ROCKSDB_LITE @@ -2488,10 +2956,11 @@ #else SuperVersion* sv = cfd->GetReferencedSuperVersion(this); - auto iter = new ForwardIterator(this, read_options, cfd, sv); + auto iter = new ForwardIterator(this, read_options, cfd, sv, + /* allow_unprepared_value */ true); result = NewDBIterator( env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, - cfd->user_comparator(), iter, kMaxSequenceNumber, + cfd->user_comparator(), iter, sv->current, kMaxSequenceNumber, sv->mutable_cf_options.max_sequential_skip_in_iterations, read_callback, this, cfd); #endif @@ -2499,10 +2968,11 @@ // Note: no need to consider the special case of // last_seq_same_as_publish_seq_==false since NewIterator is overridden in // WritePreparedTxnDB - auto snapshot = read_options.snapshot != nullptr - ? read_options.snapshot->GetSequenceNumber() - : versions_->LastSequence(); - result = NewIteratorImpl(read_options, cfd, snapshot, read_callback); + result = NewIteratorImpl(read_options, cfd, + (read_options.snapshot != nullptr) + ? read_options.snapshot->GetSequenceNumber() + : kMaxSequenceNumber, + read_callback); } return result; } @@ -2511,10 +2981,28 @@ ColumnFamilyData* cfd, SequenceNumber snapshot, ReadCallback* read_callback, - bool allow_blob, + bool expose_blob_index, bool allow_refresh) { SuperVersion* sv = cfd->GetReferencedSuperVersion(this); + TEST_SYNC_POINT("DBImpl::NewIterator:1"); + TEST_SYNC_POINT("DBImpl::NewIterator:2"); + + if (snapshot == kMaxSequenceNumber) { + // Note that the snapshot is assigned AFTER referencing the super + // version because otherwise a flush happening in between may compact away + // data for the snapshot, so the reader would see neither data that was be + // visible to the snapshot before compaction nor the newer data inserted + // afterwards. + // Note that the super version might not contain all the data available + // to this snapshot, but in that case it can see all the data in the + // super version, which is a valid consistent state after the user + // calls NewIterator(). + snapshot = versions_->LastSequence(); + TEST_SYNC_POINT("DBImpl::NewIterator:3"); + TEST_SYNC_POINT("DBImpl::NewIterator:4"); + } + // Try to generate a DB iterator tree in continuous memory area to be // cache friendly. Here is an example of result: // +-------------------------------+ @@ -2558,14 +3046,15 @@ // likely that any iterator pointer is close to the iterator it points to so // that they are likely to be in the same cache line and/or page. ArenaWrappedDBIter* db_iter = NewArenaWrappedDbIterator( - env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, snapshot, - sv->mutable_cf_options.max_sequential_skip_in_iterations, - sv->version_number, read_callback, this, cfd, allow_blob, + env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, sv->current, + snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations, + sv->version_number, read_callback, this, cfd, expose_blob_index, read_options.snapshot != nullptr ? false : allow_refresh); - InternalIterator* internal_iter = - NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(), - db_iter->GetRangeDelAggregator(), snapshot); + InternalIterator* internal_iter = NewInternalIterator( + db_iter->GetReadOptions(), cfd, sv, db_iter->GetArena(), + db_iter->GetRangeDelAggregator(), snapshot, + /* allow_unprepared_value */ true); db_iter->SetIterUnderDBIter(internal_iter); return db_iter; @@ -2591,12 +3080,13 @@ "Tailing iterator not supported in RocksDB lite"); #else for (auto cfh : column_families) { - auto cfd = reinterpret_cast(cfh)->cfd(); + auto cfd = static_cast_with_check(cfh)->cfd(); SuperVersion* sv = cfd->GetReferencedSuperVersion(this); - auto iter = new ForwardIterator(this, read_options, cfd, sv); + auto iter = new ForwardIterator(this, read_options, cfd, sv, + /* allow_unprepared_value */ true); iterators->push_back(NewDBIterator( env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, - cfd->user_comparator(), iter, kMaxSequenceNumber, + cfd->user_comparator(), iter, sv->current, kMaxSequenceNumber, sv->mutable_cf_options.max_sequential_skip_in_iterations, read_callback, this, cfd)); } @@ -2610,7 +3100,8 @@ : versions_->LastSequence(); for (size_t i = 0; i < column_families.size(); ++i) { auto* cfd = - reinterpret_cast(column_families[i])->cfd(); + static_cast_with_check(column_families[i]) + ->cfd(); iterators->push_back( NewIteratorImpl(read_options, cfd, snapshot, read_callback)); } @@ -2630,7 +3121,8 @@ SnapshotImpl* DBImpl::GetSnapshotImpl(bool is_write_conflict_boundary, bool lock) { int64_t unix_time = 0; - env_->GetCurrentTime(&unix_time); // Ignore error + immutable_db_options_.clock->GetCurrentTime(&unix_time) + .PermitUncheckedError(); // Ignore error SnapshotImpl* s = new SnapshotImpl; if (lock) { @@ -2656,7 +3148,7 @@ } namespace { -typedef autovector CfdList; +using CfdList = autovector; bool CfdListContains(const CfdList& list, ColumnFamilyData* cfd) { for (const ColumnFamilyData* t : list) { if (t == cfd) { @@ -2668,15 +3160,23 @@ } // namespace void DBImpl::ReleaseSnapshot(const Snapshot* s) { + if (s == nullptr) { + // DBImpl::GetSnapshot() can return nullptr when snapshot + // not supported by specifying the condition: + // inplace_update_support enabled. + return; + } const SnapshotImpl* casted_s = reinterpret_cast(s); { InstrumentedMutexLock l(&mutex_); snapshots_.Delete(casted_s); uint64_t oldest_snapshot; if (snapshots_.empty()) { - oldest_snapshot = last_seq_same_as_publish_seq_ - ? versions_->LastSequence() - : versions_->LastPublishedSequence(); + if (last_seq_same_as_publish_seq_) { + oldest_snapshot = versions_->LastSequence(); + } else { + oldest_snapshot = versions_->LastPublishedSequence(); + } } else { oldest_snapshot = snapshots_.oldest()->number_; } @@ -2717,7 +3217,7 @@ #ifndef ROCKSDB_LITE Status DBImpl::GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, TablePropertiesCollection* props) { - auto cfh = reinterpret_cast(column_family); + auto cfh = static_cast_with_check(column_family); auto cfd = cfh->cfd(); // Increment the ref count @@ -2739,7 +3239,7 @@ Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family, const Range* range, std::size_t n, TablePropertiesCollection* props) { - auto cfh = reinterpret_cast(column_family); + auto cfh = static_cast_with_check(column_family); auto cfd = cfh->cfd(); // Increment the ref count @@ -2765,17 +3265,37 @@ Env* DBImpl::GetEnv() const { return env_; } FileSystem* DB::GetFileSystem() const { - static LegacyFileSystemWrapper fs_wrap(GetEnv()); - return &fs_wrap; + const auto& fs = GetEnv()->GetFileSystem(); + return fs.get(); } FileSystem* DBImpl::GetFileSystem() const { return immutable_db_options_.fs.get(); } +SystemClock* DBImpl::GetSystemClock() const { + return immutable_db_options_.clock; +} + +#ifndef ROCKSDB_LITE + +Status DBImpl::StartIOTrace(const TraceOptions& trace_options, + std::unique_ptr&& trace_writer) { + assert(trace_writer != nullptr); + return io_tracer_->StartIOTrace(GetSystemClock(), trace_options, + std::move(trace_writer)); +} + +Status DBImpl::EndIOTrace() { + io_tracer_->EndIOTrace(); + return Status::OK(); +} + +#endif // ROCKSDB_LITE + Options DBImpl::GetOptions(ColumnFamilyHandle* column_family) const { InstrumentedMutexLock l(&mutex_); - auto cfh = reinterpret_cast(column_family); + auto cfh = static_cast_with_check(column_family); return Options(BuildDBOptions(immutable_db_options_, mutable_db_options_), cfh->cfd()->GetLatestCFOptions()); } @@ -2789,7 +3309,8 @@ const Slice& property, std::string* value) { const DBPropertyInfo* property_info = GetPropertyInfo(property); value->clear(); - auto cfd = reinterpret_cast(column_family)->cfd(); + auto cfd = + static_cast_with_check(column_family)->cfd(); if (property_info == nullptr) { return false; } else if (property_info->handle_int) { @@ -2801,16 +3322,21 @@ } return ret_value; } else if (property_info->handle_string) { - InstrumentedMutexLock l(&mutex_); - return cfd->internal_stats()->GetStringProperty(*property_info, property, - value); + if (property_info->need_out_of_mutex) { + return cfd->internal_stats()->GetStringProperty(*property_info, property, + value); + } else { + InstrumentedMutexLock l(&mutex_); + return cfd->internal_stats()->GetStringProperty(*property_info, property, + value); + } } else if (property_info->handle_string_dbimpl) { - std::string tmp_value; - bool ret_value = (this->*(property_info->handle_string_dbimpl))(&tmp_value); - if (ret_value) { - *value = tmp_value; + if (property_info->need_out_of_mutex) { + return (this->*(property_info->handle_string_dbimpl))(value); + } else { + InstrumentedMutexLock l(&mutex_); + return (this->*(property_info->handle_string_dbimpl))(value); } - return ret_value; } // Shouldn't reach here since exactly one of handle_string and handle_int // should be non-nullptr. @@ -2823,13 +3349,19 @@ std::map* value) { const DBPropertyInfo* property_info = GetPropertyInfo(property); value->clear(); - auto cfd = reinterpret_cast(column_family)->cfd(); + auto cfd = + static_cast_with_check(column_family)->cfd(); if (property_info == nullptr) { return false; } else if (property_info->handle_map) { - InstrumentedMutexLock l(&mutex_); - return cfd->internal_stats()->GetMapProperty(*property_info, property, - value); + if (property_info->need_out_of_mutex) { + return cfd->internal_stats()->GetMapProperty(*property_info, property, + value); + } else { + InstrumentedMutexLock l(&mutex_); + return cfd->internal_stats()->GetMapProperty(*property_info, property, + value); + } } // If we reach this point it means that handle_map is not provided for the // requested property @@ -2842,7 +3374,8 @@ if (property_info == nullptr || property_info->handle_int == nullptr) { return false; } - auto cfd = reinterpret_cast(column_family)->cfd(); + auto cfd = + static_cast_with_check(column_family)->cfd(); return GetIntPropertyInternal(cfd, *property_info, false, value); } @@ -2860,17 +3393,17 @@ } } else { SuperVersion* sv = nullptr; - if (!is_locked) { - sv = GetAndRefSuperVersion(cfd); - } else { - sv = cfd->GetSuperVersion(); + if (is_locked) { + mutex_.Unlock(); } + sv = GetAndRefSuperVersion(cfd); bool ret = cfd->internal_stats()->GetIntPropertyOutOfMutex( property_info, sv->current, value); - if (!is_locked) { - ReturnAndCleanupSuperVersion(cfd, sv); + ReturnAndCleanupSuperVersion(cfd, sv); + if (is_locked) { + mutex_.Lock(); } return ret; @@ -2879,7 +3412,7 @@ bool DBImpl::GetPropertyHandleOptionsStatistics(std::string* value) { assert(value != nullptr); - Statistics* statistics = immutable_db_options_.statistics.get(); + Statistics* statistics = immutable_db_options_.stats; if (!statistics) { return false; } @@ -2907,23 +3440,28 @@ } uint64_t sum = 0; + bool ret = true; { // Needs mutex to protect the list of column families. InstrumentedMutexLock l(&mutex_); uint64_t value; - for (auto* cfd : *versions_->GetColumnFamilySet()) { + for (auto* cfd : versions_->GetRefedColumnFamilySet()) { if (!cfd->initialized()) { continue; } - if (GetIntPropertyInternal(cfd, *property_info, true, &value)) { + ret = GetIntPropertyInternal(cfd, *property_info, true, &value); + // GetIntPropertyInternal may release db mutex and re-acquire it. + mutex_.AssertHeld(); + if (ret) { sum += value; } else { - return false; + ret = false; + break; } } } *aggregated_value = sum; - return true; + return ret; } SuperVersion* DBImpl::GetAndRefSuperVersion(ColumnFamilyData* cfd) { @@ -3015,7 +3553,7 @@ uint64_t* const count, uint64_t* const size) { ColumnFamilyHandleImpl* cfh = - reinterpret_cast(column_family); + static_cast_with_check(column_family); ColumnFamilyData* cfd = cfh->cfd(); SuperVersion* sv = GetAndRefSuperVersion(cfd); @@ -3039,16 +3577,34 @@ return Status::InvalidArgument("Invalid options"); } + const Comparator* const ucmp = column_family->GetComparator(); + assert(ucmp); + size_t ts_sz = ucmp->timestamp_size(); + Version* v; - auto cfh = reinterpret_cast(column_family); + auto cfh = static_cast_with_check(column_family); auto cfd = cfh->cfd(); SuperVersion* sv = GetAndRefSuperVersion(cfd); v = sv->current; for (int i = 0; i < n; i++) { + Slice start = range[i].start; + Slice limit = range[i].limit; + + // Add timestamp if needed + std::string start_with_ts, limit_with_ts; + if (ts_sz > 0) { + // Maximum timestamp means including all key with any timestamp + AppendKeyWithMaxTimestamp(&start_with_ts, start, ts_sz); + // Append a maximum timestamp as the range limit is exclusive: + // [start, limit) + AppendKeyWithMaxTimestamp(&limit_with_ts, limit, ts_sz); + start = start_with_ts; + limit = limit_with_ts; + } // Convert user_key into a corresponding internal key. - InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); - InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k1(start, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k2(limit, kMaxSequenceNumber, kValueTypeForSeek); sizes[i] = 0; if (options.include_files) { sizes[i] += versions_->ApproximateSize( @@ -3100,14 +3656,13 @@ FileType type; WalFileType log_type; if (!ParseFileName(name, &number, &type, &log_type) || - (type != kTableFile && type != kLogFile)) { + (type != kTableFile && type != kWalFile)) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "DeleteFile %s failed.\n", name.c_str()); return Status::InvalidArgument("Invalid file name"); } - Status status; - if (type == kLogFile) { + if (type == kWalFile) { // Only allow deleting archived log files if (log_type != kArchivedLogFile) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, @@ -3115,7 +3670,7 @@ name.c_str()); return Status::NotSupported("Delete only supported for archived logs"); } - status = wal_manager_.DeleteFile(name, number); + Status status = wal_manager_.DeleteFile(name, number); if (!status.ok()) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "DeleteFile %s failed -- %s.\n", name.c_str(), @@ -3124,6 +3679,7 @@ return status; } + Status status; int level; FileMetaData* metadata; ColumnFamilyData* cfd; @@ -3197,8 +3753,8 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family, const RangePtr* ranges, size_t n, bool include_end) { - Status status; - auto cfh = reinterpret_cast(column_family); + Status status = Status::OK(); + auto cfh = static_cast_with_check(column_family); ColumnFamilyData* cfd = cfh->cfd(); VersionEdit edit; std::set deleted_files; @@ -3252,11 +3808,13 @@ deleted_files.insert(level_file); level_file->being_compacted = true; } + vstorage->ComputeCompactionScore(*cfd->ioptions(), + *cfd->GetLatestMutableCFOptions()); } } if (edit.GetDeletedFiles().empty()) { job_context.Clean(); - return Status::OK(); + return status; } input_version->Ref(); status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), @@ -3288,10 +3846,16 @@ versions_->GetLiveFilesMetaData(metadata); } +Status DBImpl::GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) { + InstrumentedMutexLock l(&mutex_); + return versions_->GetLiveFilesChecksumInfo(checksum_list); +} + void DBImpl::GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, ColumnFamilyMetaData* cf_meta) { assert(column_family); - auto* cfd = reinterpret_cast(column_family)->cfd(); + auto* cfd = + static_cast_with_check(column_family)->cfd(); auto* sv = GetAndRefSuperVersion(cfd); { // Without mutex, Version::GetColumnFamilyMetaData will have data race with @@ -3309,6 +3873,17 @@ ReturnAndCleanupSuperVersion(cfd, sv); } +void DBImpl::GetAllColumnFamilyMetaData( + std::vector* metadata) { + InstrumentedMutexLock l(&mutex_); + for (auto cfd : *(versions_->GetColumnFamilySet())) { + { + metadata->emplace_back(); + cfd->current()->GetColumnFamilyMetaData(&metadata->back()); + } + } +} + #endif // ROCKSDB_LITE Status DBImpl::CheckConsistency() { @@ -3400,13 +3975,48 @@ return s; } - // If last character is '\n' remove it from identity + // If last character is '\n' remove it from identity. (Old implementations + // of Env::GenerateUniqueId() would include a trailing '\n'.) if (identity->size() > 0 && identity->back() == '\n') { identity->pop_back(); } return s; } +Status DBImpl::GetDbSessionId(std::string& session_id) const { + session_id.assign(db_session_id_); + return Status::OK(); +} + +namespace { +SemiStructuredUniqueIdGen* DbSessionIdGen() { + static SemiStructuredUniqueIdGen gen; + return &gen; +} +} // namespace + +void DBImpl::TEST_ResetDbSessionIdGen() { DbSessionIdGen()->Reset(); } + +std::string DBImpl::GenerateDbSessionId(Env*) { + // See SemiStructuredUniqueIdGen for its desirable properties. + auto gen = DbSessionIdGen(); + + uint64_t lo, hi; + gen->GenerateNext(&hi, &lo); + if (lo == 0) { + // Avoid emitting session ID with lo==0, so that SST unique + // IDs can be more easily ensured non-zero + gen->GenerateNext(&hi, &lo); + assert(lo != 0); + } + return EncodeSessionId(hi, lo); +} + +void DBImpl::SetDbSessionId() { + db_session_id_ = GenerateDbSessionId(env_); + TEST_SYNC_POINT_CALLBACK("DBImpl::SetDbSessionId", &db_session_id_); +} + // Default implementation -- returns not supported status Status DB::CreateColumnFamily(const ColumnFamilyOptions& /*cf_options*/, const std::string& /*column_family_name*/, @@ -3437,6 +4047,10 @@ } Status DB::DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family) { + if (DefaultColumnFamily() == column_family) { + return Status::InvalidArgument( + "Cannot destroy the handle returned by DefaultColumnFamily()"); + } delete column_family; return Status::OK(); } @@ -3444,30 +4058,27 @@ DB::~DB() {} Status DBImpl::Close() { - if (!closed_) { - { - InstrumentedMutexLock l(&mutex_); - // If there is unreleased snapshot, fail the close call - if (!snapshots_.empty()) { - return Status::Aborted("Cannot close DB with unreleased snapshot."); - } + InstrumentedMutexLock closing_lock_guard(&closing_mutex_); + if (closed_) { + return closing_status_; + } + { + InstrumentedMutexLock l(&mutex_); + // If there is unreleased snapshot, fail the close call + if (!snapshots_.empty()) { + return Status::Aborted("Cannot close DB with unreleased snapshot."); } - - closed_ = true; - return CloseImpl(); } - return Status::OK(); + closing_status_ = CloseImpl(); + closed_ = true; + return closing_status_; } Status DB::ListColumnFamilies(const DBOptions& db_options, const std::string& name, std::vector* column_families) { - FileSystem* fs = db_options.file_system.get(); - LegacyFileSystemWrapper legacy_fs(db_options.env); - if (!fs) { - fs = &legacy_fs; - } - return VersionSet::ListColumnFamilies(column_families, name, fs); + const std::shared_ptr& fs = db_options.env->GetFileSystem(); + return VersionSet::ListColumnFamilies(column_families, name, fs.get()); } Snapshot::~Snapshot() {} @@ -3477,13 +4088,13 @@ ImmutableDBOptions soptions(SanitizeOptions(dbname, options)); Env* env = soptions.env; std::vector filenames; - bool wal_in_db_path = IsWalDirSameAsDBPath(&soptions); + bool wal_in_db_path = soptions.IsWalDirSameAsDBPath(); // Reset the logger because it holds a handle to the // log file and prevents cleanup and directory removal soptions.info_log.reset(); // Ignore error in case directory does not exist - env->GetChildren(dbname, &filenames); + env->GetChildren(dbname, &filenames).PermitUncheckedError(); FileLock* lock; const std::string lockname = LockFileName(dbname); @@ -3499,57 +4110,53 @@ std::string path_to_delete = dbname + "/" + fname; if (type == kMetaDatabase) { del = DestroyDB(path_to_delete, options); - } else if (type == kTableFile || type == kLogFile) { - del = DeleteDBFile(&soptions, path_to_delete, dbname, - /*force_bg=*/false, /*force_fg=*/!wal_in_db_path); + } else if (type == kTableFile || type == kWalFile || + type == kBlobFile) { + del = DeleteDBFile( + &soptions, path_to_delete, dbname, + /*force_bg=*/false, + /*force_fg=*/(type == kWalFile) ? !wal_in_db_path : false); } else { del = env->DeleteFile(path_to_delete); } - if (result.ok() && !del.ok()) { + if (!del.ok() && result.ok()) { result = del; } } } - std::vector paths; - - for (const auto& path : options.db_paths) { - paths.emplace_back(path.path); - } - for (const auto& cf : column_families) { - for (const auto& path : cf.options.cf_paths) { - paths.emplace_back(path.path); + std::set paths; + for (const DbPath& db_path : options.db_paths) { + paths.insert(db_path.path); + } + for (const ColumnFamilyDescriptor& cf : column_families) { + for (const DbPath& cf_path : cf.options.cf_paths) { + paths.insert(cf_path.path); } } - - // Remove duplicate paths. - // Note that we compare only the actual paths but not path ids. - // This reason is that same path can appear at different path_ids - // for different column families. - std::sort(paths.begin(), paths.end()); - paths.erase(std::unique(paths.begin(), paths.end()), paths.end()); - for (const auto& path : paths) { if (env->GetChildren(path, &filenames).ok()) { for (const auto& fname : filenames) { if (ParseFileName(fname, &number, &type) && - type == kTableFile) { // Lock file will be deleted at end - std::string table_path = path + "/" + fname; - Status del = DeleteDBFile(&soptions, table_path, dbname, + (type == kTableFile || + type == kBlobFile)) { // Lock file will be deleted at end + std::string file_path = path + "/" + fname; + Status del = DeleteDBFile(&soptions, file_path, dbname, /*force_bg=*/false, /*force_fg=*/false); - if (result.ok() && !del.ok()) { + if (!del.ok() && result.ok()) { result = del; } } } - env->DeleteDir(path); + // TODO: Should we return an error if we cannot delete the directory? + env->DeleteDir(path).PermitUncheckedError(); } } std::vector walDirFiles; std::string archivedir = ArchivalDirectory(dbname); bool wal_dir_exists = false; - if (dbname != soptions.wal_dir) { + if (!soptions.IsWalDirSameAsDBPath(dbname)) { wal_dir_exists = env->GetChildren(soptions.wal_dir, &walDirFiles).ok(); archivedir = ArchivalDirectory(soptions.wal_dir); } @@ -3561,42 +4168,47 @@ if (env->GetChildren(archivedir, &archiveFiles).ok()) { // Delete archival files. for (const auto& file : archiveFiles) { - if (ParseFileName(file, &number, &type) && type == kLogFile) { + if (ParseFileName(file, &number, &type) && type == kWalFile) { Status del = DeleteDBFile(&soptions, archivedir + "/" + file, archivedir, /*force_bg=*/false, /*force_fg=*/!wal_in_db_path); - if (result.ok() && !del.ok()) { + if (!del.ok() && result.ok()) { result = del; } } } - env->DeleteDir(archivedir); + // Ignore error in case dir contains other files + env->DeleteDir(archivedir).PermitUncheckedError(); } // Delete log files in the WAL dir if (wal_dir_exists) { for (const auto& file : walDirFiles) { - if (ParseFileName(file, &number, &type) && type == kLogFile) { + if (ParseFileName(file, &number, &type) && type == kWalFile) { Status del = DeleteDBFile(&soptions, LogFileName(soptions.wal_dir, number), soptions.wal_dir, /*force_bg=*/false, /*force_fg=*/!wal_in_db_path); - if (result.ok() && !del.ok()) { + if (!del.ok() && result.ok()) { result = del; } } } - env->DeleteDir(soptions.wal_dir); + // Ignore error in case dir contains other files + env->DeleteDir(soptions.wal_dir).PermitUncheckedError(); } - env->UnlockFile(lock); // Ignore error since state is already gone - env->DeleteFile(lockname); + // Ignore error since state is already gone + env->UnlockFile(lock).PermitUncheckedError(); + env->DeleteFile(lockname).PermitUncheckedError(); // sst_file_manager holds a ref to the logger. Make sure the logger is // gone before trying to remove the directory. soptions.sst_file_manager.reset(); - env->DeleteDir(dbname); // Ignore error in case dir contains other files + // Ignore error in case dir contains other files + env->DeleteDir(dbname).PermitUncheckedError(); + ; } return result; } @@ -3634,11 +4246,13 @@ TEST_SYNC_POINT("DBImpl::WriteOptionsFile:1"); TEST_SYNC_POINT("DBImpl::WriteOptionsFile:2"); + TEST_SYNC_POINT_CALLBACK("DBImpl::WriteOptionsFile:PersistOptions", + &db_options); std::string file_name = TempOptionsFileName(GetName(), versions_->NewFileNumber()); Status s = PersistRocksDBOptions(db_options, cf_names, cf_opts, file_name, - GetFileSystem()); + fs_.get()); if (s.ok()) { s = RenameTempFileToOptionsFile(file_name); @@ -3723,15 +4337,29 @@ uint64_t options_file_number = versions_->NewFileNumber(); std::string options_file_name = OptionsFileName(GetName(), options_file_number); - // Retry if the file name happen to conflict with an existing one. - s = GetEnv()->RenameFile(file_name, options_file_name); + uint64_t options_file_size = 0; + s = GetEnv()->GetFileSize(file_name, &options_file_size); + if (s.ok()) { + // Retry if the file name happen to conflict with an existing one. + s = GetEnv()->RenameFile(file_name, options_file_name); + std::unique_ptr dir_obj; + if (s.ok()) { + s = fs_->NewDirectory(GetName(), IOOptions(), &dir_obj, nullptr); + } + if (s.ok()) { + s = dir_obj->FsyncWithDirOptions(IOOptions(), nullptr, + DirFsyncOptions(options_file_name)); + } + } if (s.ok()) { InstrumentedMutexLock l(&mutex_); versions_->options_file_number_ = options_file_number; + versions_->options_file_size_ = options_file_size; } if (0 == disable_delete_obsolete_files_) { - DeleteObsoleteOptionsFiles(); + // TODO: Should we check for errors here? + DeleteObsoleteOptionsFiles().PermitUncheckedError(); } return s; #else @@ -3772,16 +4400,17 @@ // // A global method that can dump out the build version void DumpRocksDBBuildVersion(Logger* log) { -#if !defined(IOS_CROSS_COMPILE) - // if we compile with Xcode, we don't run build_detect_version, so we don't - // generate util/build_version.cc - ROCKS_LOG_HEADER(log, "RocksDB version: %d.%d.%d\n", ROCKSDB_MAJOR, - ROCKSDB_MINOR, ROCKSDB_PATCH); - ROCKS_LOG_HEADER(log, "Git sha %s", rocksdb_build_git_sha); - ROCKS_LOG_HEADER(log, "Compile date %s", rocksdb_build_compile_date); -#else - (void)log; // ignore "-Wunused-parameter" -#endif + ROCKS_LOG_HEADER(log, "RocksDB version: %s\n", + GetRocksVersionAsString().c_str()); + const auto& props = GetRocksBuildProperties(); + const auto& sha = props.find("rocksdb_build_git_sha"); + if (sha != props.end()) { + ROCKS_LOG_HEADER(log, "Git sha %s", sha->second.c_str()); + } + const auto date = props.find("rocksdb_build_date"); + if (date != props.end()) { + ROCKS_LOG_HEADER(log, "Compile date %s", date->second.c_str()); + } } #ifndef ROCKSDB_LITE @@ -3798,29 +4427,41 @@ return earliest_seq; } -#endif // ROCKSDB_LITE -#ifndef ROCKSDB_LITE -Status DBImpl::GetLatestSequenceForKey(SuperVersion* sv, const Slice& key, - bool cache_only, - SequenceNumber lower_bound_seq, - SequenceNumber* seq, - bool* found_record_for_key, - bool* is_blob_index) { +Status DBImpl::GetLatestSequenceForKey( + SuperVersion* sv, const Slice& key, bool cache_only, + SequenceNumber lower_bound_seq, SequenceNumber* seq, std::string* timestamp, + bool* found_record_for_key, bool* is_blob_index) { Status s; MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; ReadOptions read_options; SequenceNumber current_seq = versions_->LastSequence(); - LookupKey lkey(key, current_seq); + + ColumnFamilyData* cfd = sv->cfd; + assert(cfd); + const Comparator* const ucmp = cfd->user_comparator(); + assert(ucmp); + size_t ts_sz = ucmp->timestamp_size(); + std::string ts_buf; + if (ts_sz > 0) { + assert(timestamp); + ts_buf.assign(ts_sz, '\xff'); + } else { + assert(!timestamp); + } + Slice ts(ts_buf); + + LookupKey lkey(key, current_seq, ts_sz == 0 ? nullptr : &ts); *seq = kMaxSequenceNumber; *found_record_for_key = false; // Check if there is a record for this key in the latest memtable - sv->mem->Get(lkey, nullptr, &s, &merge_context, &max_covering_tombstone_seq, - seq, read_options, nullptr /*read_callback*/, is_blob_index); + sv->mem->Get(lkey, /*value=*/nullptr, timestamp, &s, &merge_context, + &max_covering_tombstone_seq, seq, read_options, + nullptr /*read_callback*/, is_blob_index); if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) { // unexpected error reading memtable. @@ -3830,6 +4471,10 @@ return s; } + assert(!ts_sz || + (*seq != kMaxSequenceNumber && + *timestamp != std::string(ts_sz, '\xff')) || + (*seq == kMaxSequenceNumber && timestamp->empty())); if (*seq != kMaxSequenceNumber) { // Found a sequence number, no need to check immutable memtables @@ -3845,8 +4490,9 @@ } // Check if there is a record for this key in the immutable memtables - sv->imm->Get(lkey, nullptr, &s, &merge_context, &max_covering_tombstone_seq, - seq, read_options, nullptr /*read_callback*/, is_blob_index); + sv->imm->Get(lkey, /*value=*/nullptr, timestamp, &s, &merge_context, + &max_covering_tombstone_seq, seq, read_options, + nullptr /*read_callback*/, is_blob_index); if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) { // unexpected error reading memtable. @@ -3857,6 +4503,11 @@ return s; } + assert(!ts_sz || + (*seq != kMaxSequenceNumber && + *timestamp != std::string(ts_sz, '\xff')) || + (*seq == kMaxSequenceNumber && timestamp->empty())); + if (*seq != kMaxSequenceNumber) { // Found a sequence number, no need to check memtable history *found_record_for_key = true; @@ -3871,9 +4522,9 @@ } // Check if there is a record for this key in the immutable memtables - sv->imm->GetFromHistory(lkey, nullptr, &s, &merge_context, - &max_covering_tombstone_seq, seq, read_options, - is_blob_index); + sv->imm->GetFromHistory(lkey, /*value=*/nullptr, timestamp, &s, + &merge_context, &max_covering_tombstone_seq, seq, + read_options, is_blob_index); if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) { // unexpected error reading memtable. @@ -3885,8 +4536,13 @@ return s; } + assert(!ts_sz || + (*seq != kMaxSequenceNumber && + *timestamp != std::string(ts_sz, '\xff')) || + (*seq == kMaxSequenceNumber && timestamp->empty())); if (*seq != kMaxSequenceNumber) { // Found a sequence number, no need to check SST files + assert(0 == ts_sz || *timestamp != std::string(ts_sz, '\xff')); *found_record_for_key = true; return Status::OK(); } @@ -3899,8 +4555,10 @@ // SST files if cache_only=true? if (!cache_only) { // Check tables - sv->current->Get(read_options, lkey, nullptr, &s, &merge_context, - &max_covering_tombstone_seq, nullptr /* value_found */, + PinnedIteratorsManager pinned_iters_mgr; + sv->current->Get(read_options, lkey, /*value=*/nullptr, timestamp, &s, + &merge_context, &max_covering_tombstone_seq, + &pinned_iters_mgr, nullptr /* value_found */, found_record_for_key, seq, nullptr /*read_callback*/, is_blob_index); @@ -3944,7 +4602,7 @@ } } // Ingest multiple external SST files atomically. - size_t num_cfs = args.size(); + const size_t num_cfs = args.size(); for (size_t i = 0; i != num_cfs; ++i) { if (args[i].external_files.empty()) { char err_msg[128] = {0}; @@ -3981,14 +4639,11 @@ std::vector ingestion_jobs; for (const auto& arg : args) { auto* cfd = static_cast(arg.column_family)->cfd(); - ingestion_jobs.emplace_back( - env_, versions_.get(), cfd, immutable_db_options_, file_options_, - &snapshots_, arg.options, &directories_, &event_logger_); - } - std::vector> exec_results; - for (size_t i = 0; i != num_cfs; ++i) { - exec_results.emplace_back(false, Status::OK()); + ingestion_jobs.emplace_back(versions_.get(), cfd, immutable_db_options_, + file_options_, &snapshots_, arg.options, + &directories_, &event_logger_, io_tracer_); } + // TODO(yanqin) maybe make jobs run in parallel uint64_t start_file_number = next_file_number; for (size_t i = 1; i != num_cfs; ++i) { @@ -3996,9 +4651,14 @@ auto* cfd = static_cast(args[i].column_family)->cfd(); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); - exec_results[i].second = ingestion_jobs[i].Prepare( - args[i].external_files, start_file_number, super_version); - exec_results[i].first = true; + Status es = ingestion_jobs[i].Prepare( + args[i].external_files, args[i].files_checksums, + args[i].files_checksum_func_names, args[i].file_temperature, + start_file_number, super_version); + // capture first error only + if (!es.ok() && status.ok()) { + status = es; + } CleanupSuperVersion(super_version); } TEST_SYNC_POINT("DBImpl::IngestExternalFiles:BeforeLastJobPrepare:0"); @@ -4007,22 +4667,18 @@ auto* cfd = static_cast(args[0].column_family)->cfd(); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); - exec_results[0].second = ingestion_jobs[0].Prepare( - args[0].external_files, next_file_number, super_version); - exec_results[0].first = true; - CleanupSuperVersion(super_version); - } - for (const auto& exec_result : exec_results) { - if (!exec_result.second.ok()) { - status = exec_result.second; - break; + Status es = ingestion_jobs[0].Prepare( + args[0].external_files, args[0].files_checksums, + args[0].files_checksum_func_names, args[0].file_temperature, + next_file_number, super_version); + if (!es.ok()) { + status = es; } + CleanupSuperVersion(super_version); } if (!status.ok()) { for (size_t i = 0; i != num_cfs; ++i) { - if (exec_results[i].first) { - ingestion_jobs[i].Cleanup(status); - } + ingestion_jobs[i].Cleanup(status); } InstrumentedMutexLock l(&mutex_); ReleaseFileNumberFromPendingOutputs(pending_output_elem); @@ -4122,14 +4778,11 @@ if (status.ok()) { int consumed_seqno_count = ingestion_jobs[0].ConsumedSequenceNumbersCount(); -#ifndef NDEBUG for (size_t i = 1; i != num_cfs; ++i) { - assert(!!consumed_seqno_count == - !!ingestion_jobs[i].ConsumedSequenceNumbersCount()); - consumed_seqno_count += - ingestion_jobs[i].ConsumedSequenceNumbersCount(); + consumed_seqno_count = + std::max(consumed_seqno_count, + ingestion_jobs[i].ConsumedSequenceNumbersCount()); } -#endif if (consumed_seqno_count > 0) { const SequenceNumber last_seqno = versions_->LastSequence(); versions_->SetLastAllocatedSequence(last_seqno + consumed_seqno_count); @@ -4184,6 +4837,15 @@ #endif // !NDEBUG } } + } else if (versions_->io_status().IsIOError()) { + // Error while writing to MANIFEST. + // In fact, versions_->io_status() can also be the result of renaming + // CURRENT file. With current code, it's just difficult to tell. So just + // be pessimistic and try write to a new MANIFEST. + // TODO: distinguish between MANIFEST write and CURRENT renaming + const IOStatus& io_s = versions_->io_status(); + // Should handle return error? + error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite); } // Resume writes to the DB @@ -4243,11 +4905,11 @@ } // Import sst files from metadata. - auto cfh = reinterpret_cast(*handle); + auto cfh = static_cast_with_check(*handle); auto cfd = cfh->cfd(); - ImportColumnFamilyJob import_job(env_, versions_.get(), cfd, - immutable_db_options_, file_options_, - import_options, metadata.files); + ImportColumnFamilyJob import_job(versions_.get(), cfd, immutable_db_options_, + file_options_, import_options, + metadata.files, io_tracer_); SuperVersionContext dummy_sv_ctx(/* create_superversion */ true); VersionEdit dummy_edit; @@ -4338,15 +5000,49 @@ import_job.Cleanup(status); if (!status.ok()) { - DropColumnFamily(*handle); - DestroyColumnFamilyHandle(*handle); + Status temp_s = DropColumnFamily(*handle); + if (!temp_s.ok()) { + ROCKS_LOG_ERROR(immutable_db_options_.info_log, + "DropColumnFamily failed with error %s", + temp_s.ToString().c_str()); + } + // Always returns Status::OK() + temp_s = DestroyColumnFamilyHandle(*handle); + assert(temp_s.ok()); *handle = nullptr; } return status; } +Status DBImpl::VerifyFileChecksums(const ReadOptions& read_options) { + return VerifyChecksumInternal(read_options, /*use_file_checksum=*/true); +} + Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { + return VerifyChecksumInternal(read_options, /*use_file_checksum=*/false); +} + +Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, + bool use_file_checksum) { + // `bytes_read` stat is enabled based on compile-time support and cannot + // be dynamically toggled. So we do not need to worry about `PerfLevel` + // here, unlike many other `IOStatsContext` / `PerfContext` stats. + uint64_t prev_bytes_read = IOSTATS(bytes_read); + Status s; + + if (use_file_checksum) { + FileChecksumGenFactory* const file_checksum_gen_factory = + immutable_db_options_.file_checksum_gen_factory.get(); + if (!file_checksum_gen_factory) { + s = Status::InvalidArgument( + "Cannot verify file checksum if options.file_checksum_gen_factory is " + "null"); + return s; + } + } + + // TODO: simplify using GetRefedColumnFamilySet? std::vector cfd_list; { InstrumentedMutexLock l(&mutex_); @@ -4361,11 +5057,12 @@ for (auto cfd : cfd_list) { sv_list.push_back(cfd->GetReferencedSuperVersion(this)); } + for (auto& sv : sv_list) { VersionStorageInfo* vstorage = sv->current->storage_info(); ColumnFamilyData* cfd = sv->current->cfd(); Options opts; - { + if (!use_file_checksum) { InstrumentedMutexLock l(&mutex_); opts = Options(BuildDBOptions(immutable_db_options_, mutable_db_options_), cfd->GetLatestCFOptions()); @@ -4373,17 +5070,50 @@ for (int i = 0; i < vstorage->num_non_empty_levels() && s.ok(); i++) { for (size_t j = 0; j < vstorage->LevelFilesBrief(i).num_files && s.ok(); j++) { - const auto& fd = vstorage->LevelFilesBrief(i).files[j].fd; + const auto& fd_with_krange = vstorage->LevelFilesBrief(i).files[j]; + const auto& fd = fd_with_krange.fd; + const FileMetaData* fmeta = fd_with_krange.file_metadata; + assert(fmeta); std::string fname = TableFileName(cfd->ioptions()->cf_paths, fd.GetNumber(), fd.GetPathId()); - s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_, - read_options, fname); + if (use_file_checksum) { + s = VerifyFullFileChecksum(fmeta->file_checksum, + fmeta->file_checksum_func_name, fname, + read_options); + } else { + s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_, + read_options, fname); + } + RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES, + IOSTATS(bytes_read) - prev_bytes_read); + prev_bytes_read = IOSTATS(bytes_read); + } + } + + if (s.ok() && use_file_checksum) { + const auto& blob_files = vstorage->GetBlobFiles(); + for (const auto& pair : blob_files) { + const uint64_t blob_file_number = pair.first; + const auto& meta = pair.second; + assert(meta); + const std::string blob_file_name = BlobFileName( + cfd->ioptions()->cf_paths.front().path, blob_file_number); + s = VerifyFullFileChecksum(meta->GetChecksumValue(), + meta->GetChecksumMethod(), blob_file_name, + read_options); + RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES, + IOSTATS(bytes_read) - prev_bytes_read); + prev_bytes_read = IOSTATS(bytes_read); + if (!s.ok()) { + break; + } } } if (!s.ok()) { break; } } + bool defer_purge = immutable_db_options().avoid_unnecessary_blocking_io; { @@ -4405,6 +5135,38 @@ cfd->UnrefAndTryDelete(); } } + RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES, + IOSTATS(bytes_read) - prev_bytes_read); + return s; +} + +Status DBImpl::VerifyFullFileChecksum(const std::string& file_checksum_expected, + const std::string& func_name_expected, + const std::string& fname, + const ReadOptions& read_options) { + Status s; + if (file_checksum_expected == kUnknownFileChecksum) { + return s; + } + std::string file_checksum; + std::string func_name; + s = ROCKSDB_NAMESPACE::GenerateOneFileChecksum( + fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(), + func_name_expected, &file_checksum, &func_name, + read_options.readahead_size, immutable_db_options_.allow_mmap_reads, + io_tracer_, immutable_db_options_.rate_limiter.get()); + if (s.ok()) { + assert(func_name_expected == func_name); + if (file_checksum != file_checksum_expected) { + std::ostringstream oss; + oss << fname << " file checksum mismatch, "; + oss << "expecting " + << Slice(file_checksum_expected).ToString(/*hex=*/true); + oss << ", but actual " << Slice(file_checksum).ToString(/*hex=*/true); + s = Status::Corruption(oss.str()); + TEST_SYNC_POINT_CALLBACK("DBImpl::VerifyFullFileChecksum:mismatch", &s); + } + } return s; } @@ -4437,7 +5199,8 @@ Status DBImpl::StartTrace(const TraceOptions& trace_options, std::unique_ptr&& trace_writer) { InstrumentedMutexLock lock(&trace_mutex_); - tracer_.reset(new Tracer(env_, trace_options, std::move(trace_writer))); + tracer_.reset(new Tracer(immutable_db_options_.clock, trace_options, + std::move(trace_writer))); return Status::OK(); } @@ -4448,16 +5211,24 @@ s = tracer_->Close(); tracer_.reset(); } else { - return Status::IOError("No trace file to close"); + s = Status::IOError("No trace file to close"); } return s; } +Status DBImpl::NewDefaultReplayer( + const std::vector& handles, + std::unique_ptr&& reader, + std::unique_ptr* replayer) { + replayer->reset(new ReplayerImpl(this, handles, std::move(reader))); + return Status::OK(); +} + Status DBImpl::StartBlockCacheTrace( const TraceOptions& trace_options, std::unique_ptr&& trace_writer) { - return block_cache_tracer_.StartTrace(env_, trace_options, - std::move(trace_writer)); + return block_cache_tracer_.StartTrace(immutable_db_options_.clock, + trace_options, std::move(trace_writer)); } Status DBImpl::EndBlockCacheTrace() { @@ -4465,24 +5236,27 @@ return Status::OK(); } -Status DBImpl::TraceIteratorSeek(const uint32_t& cf_id, const Slice& key) { +Status DBImpl::TraceIteratorSeek(const uint32_t& cf_id, const Slice& key, + const Slice& lower_bound, + const Slice upper_bound) { Status s; if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { - s = tracer_->IteratorSeek(cf_id, key); + s = tracer_->IteratorSeek(cf_id, key, lower_bound, upper_bound); } } return s; } -Status DBImpl::TraceIteratorSeekForPrev(const uint32_t& cf_id, - const Slice& key) { +Status DBImpl::TraceIteratorSeekForPrev(const uint32_t& cf_id, const Slice& key, + const Slice& lower_bound, + const Slice upper_bound) { Status s; if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { - s = tracer_->IteratorSeekForPrev(cf_id, key); + s = tracer_->IteratorSeekForPrev(cf_id, key, lower_bound, upper_bound); } } return s; diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/db_impl.h mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl.h --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/db_impl.h 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl.h 2025-05-19 16:14:27.000000000 +0000 @@ -20,8 +20,8 @@ #include #include "db/column_family.h" +#include "db/compaction/compaction_iterator.h" #include "db/compaction/compaction_job.h" -#include "db/dbformat.h" #include "db/error_handler.h" #include "db/event_helpers.h" #include "db/external_sst_file_ingestion_job.h" @@ -50,12 +50,16 @@ #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/status.h" +#ifndef ROCKSDB_LITE #include "rocksdb/trace_reader_writer.h" +#endif // ROCKSDB_LITE #include "rocksdb/transaction_log.h" +#ifndef ROCKSDB_LITE +#include "rocksdb/utilities/replayer.h" +#endif // ROCKSDB_LITE #include "rocksdb/write_buffer_manager.h" +#include "table/merging_iterator.h" #include "table/scoped_arena_iterator.h" -#include "trace_replay/block_cache_tracer.h" -#include "trace_replay/trace_replay.h" #include "util/autovector.h" #include "util/hash.h" #include "util/repeatable_thread.h" @@ -69,6 +73,10 @@ class InMemoryStatsHistoryIterator; class MemTable; class PersistentStatsHistoryIterator; +class PeriodicWorkScheduler; +#ifndef NDEBUG +class PeriodicWorkTestScheduler; +#endif // !NDEBUG class TableCache; class TaskLimiterToken; class Version; @@ -82,13 +90,13 @@ // Class to maintain directories for all database paths other than main one. class Directories { public: - Status SetDirectories(Env* env, const std::string& dbname, - const std::string& wal_dir, - const std::vector& data_paths); + IOStatus SetDirectories(FileSystem* fs, const std::string& dbname, + const std::string& wal_dir, + const std::vector& data_paths); - Directory* GetDataDir(size_t path_id) const { + FSDirectory* GetDataDir(size_t path_id) const { assert(path_id < data_dirs_.size()); - Directory* ret_dir = data_dirs_[path_id].get(); + FSDirectory* ret_dir = data_dirs_[path_id].get(); if (ret_dir == nullptr) { // Should use db_dir_ return db_dir_.get(); @@ -96,19 +104,19 @@ return ret_dir; } - Directory* GetWalDir() { + FSDirectory* GetWalDir() { if (wal_dir_) { return wal_dir_.get(); } return db_dir_.get(); } - Directory* GetDbDir() { return db_dir_.get(); } + FSDirectory* GetDbDir() { return db_dir_.get(); } private: - std::unique_ptr db_dir_; - std::vector> data_dirs_; - std::unique_ptr wal_dir_; + std::unique_ptr db_dir_; + std::vector> data_dirs_; + std::unique_ptr wal_dir_; }; // While DB is the public interface of RocksDB, and DBImpl is the actual @@ -127,7 +135,8 @@ class DBImpl : public DB { public: DBImpl(const DBOptions& options, const std::string& dbname, - const bool seq_per_batch = false, const bool batch_per_txn = true); + const bool seq_per_batch = false, const bool batch_per_txn = true, + bool read_only = false); // No copying allowed DBImpl(const DBImpl&) = delete; void operator=(const DBImpl&) = delete; @@ -163,6 +172,9 @@ virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; + virtual Status Get(const ReadOptions& options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value, std::string* timestamp) override; using DB::GetMergeOperands; Status GetMergeOperands(const ReadOptions& options, @@ -185,6 +197,11 @@ const std::vector& column_family, const std::vector& keys, std::vector* values) override; + virtual std::vector MultiGet( + const ReadOptions& options, + const std::vector& column_family, + const std::vector& keys, std::vector* values, + std::vector* timestamps) override; // This MultiGet is a batched version, which may be faster than calling Get // multiple times, especially if the keys have some spatial locality that @@ -198,11 +215,22 @@ const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input = false) override; + virtual void MultiGet(const ReadOptions& options, + ColumnFamilyHandle* column_family, + const size_t num_keys, const Slice* keys, + PinnableSlice* values, std::string* timestamps, + Status* statuses, + const bool sorted_input = false) override; virtual void MultiGet(const ReadOptions& options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input = false) override; + virtual void MultiGet(const ReadOptions& options, const size_t num_keys, + ColumnFamilyHandle** column_families, const Slice* keys, + PinnableSlice* values, std::string* timestamps, + Status* statuses, + const bool sorted_input = false) override; virtual void MultiGetWithCallback( const ReadOptions& options, ColumnFamilyHandle* column_family, @@ -230,7 +258,7 @@ using DB::KeyMayExist; virtual bool KeyMayExist(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, - std::string* value, + std::string* value, std::string* timestamp, bool* value_found = nullptr) override; using DB::NewIterator; @@ -327,16 +355,34 @@ virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) override; + // IncreaseFullHistoryTsLow(ColumnFamilyHandle*, std::string) will acquire + // and release db_mutex + Status IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family, + std::string ts_low) override; + + // GetFullHistoryTsLow(ColumnFamilyHandle*, std::string*) will acquire and + // release db_mutex + Status GetFullHistoryTsLow(ColumnFamilyHandle* column_family, + std::string* ts_low) override; + virtual Status GetDbIdentity(std::string& identity) const override; virtual Status GetDbIdentityFromIdentityFile(std::string* identity) const; + virtual Status GetDbSessionId(std::string& session_id) const override; + ColumnFamilyHandle* DefaultColumnFamily() const override; ColumnFamilyHandle* PersistentStatsColumnFamily() const; virtual Status Close() override; + virtual Status DisableFileDeletions() override; + + virtual Status EnableFileDeletions(bool force) override; + + virtual bool IsFileDeletionsEnabled() const; + Status GetStatsHistory( uint64_t start_time, uint64_t end_time, std::unique_ptr* stats_iterator) override; @@ -344,9 +390,6 @@ #ifndef ROCKSDB_LITE using DB::ResetStats; virtual Status ResetStats() override; - virtual Status DisableFileDeletions() override; - virtual Status EnableFileDeletions(bool force) override; - virtual int IsFileDeletionsEnabled() const; // All the returned filenames start with "/" virtual Status GetLiveFiles(std::vector&, uint64_t* manifest_file_size, @@ -369,13 +412,21 @@ virtual void GetLiveFilesMetaData( std::vector* metadata) override; + virtual Status GetLiveFilesChecksumInfo( + FileChecksumList* checksum_list) override; + + virtual Status GetLiveFilesStorageInfo( + const LiveFilesStorageInfoOptions& opts, + std::vector* files) override; + // Obtains the meta data of the specified column family of the DB. - // Status::NotFound() will be returned if the current DB does not have - // any column family match the specified name. // TODO(yhchiang): output parameter is placed in the end in this codebase. virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, ColumnFamilyMetaData* metadata) override; + void GetAllColumnFamilyMetaData( + std::vector* metadata) override; + Status SuggestCompactRange(ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) override; @@ -399,8 +450,29 @@ const ExportImportFilesMetaData& metadata, ColumnFamilyHandle** handle) override; + using DB::VerifyFileChecksums; + Status VerifyFileChecksums(const ReadOptions& read_options) override; + using DB::VerifyChecksum; virtual Status VerifyChecksum(const ReadOptions& /*read_options*/) override; + // Verify the checksums of files in db. Currently only tables are checked. + // + // read_options: controls file I/O behavior, e.g. read ahead size while + // reading all the live table files. + // + // use_file_checksum: if false, verify the block checksums of all live table + // in db. Otherwise, obtain the file checksums and compare + // with the MANIFEST. Currently, file checksums are + // recomputed by reading all table files. + // + // Returns: OK if there is no file whose file or block checksum mismatches. + Status VerifyChecksumInternal(const ReadOptions& read_options, + bool use_file_checksum); + + Status VerifyFullFileChecksum(const std::string& file_checksum_expected, + const std::string& func_name_expected, + const std::string& fpath, + const ReadOptions& read_options); using DB::StartTrace; virtual Status StartTrace( @@ -410,6 +482,12 @@ using DB::EndTrace; virtual Status EndTrace() override; + using DB::NewDefaultReplayer; + virtual Status NewDefaultReplayer( + const std::vector& handles, + std::unique_ptr&& reader, + std::unique_ptr* replayer) override; + using DB::StartBlockCacheTrace; Status StartBlockCacheTrace( const TraceOptions& options, @@ -418,6 +496,13 @@ using DB::EndBlockCacheTrace; Status EndBlockCacheTrace() override; + using DB::StartIOTrace; + Status StartIOTrace(const TraceOptions& options, + std::unique_ptr&& trace_writer) override; + + using DB::EndIOTrace; + Status EndIOTrace() override; + using DB::GetPropertiesOfAllTables; virtual Status GetPropertiesOfAllTables( ColumnFamilyHandle* column_family, @@ -429,10 +514,12 @@ #endif // ROCKSDB_LITE // ---- End of implementations of the DB interface ---- + SystemClock* GetSystemClock() const; struct GetImplOptions { ColumnFamilyHandle* column_family = nullptr; PinnableSlice* value = nullptr; + std::string* timestamp = nullptr; bool* value_found = nullptr; ReadCallback* callback = nullptr; bool* is_blob_index = nullptr; @@ -455,13 +542,14 @@ // If get_impl_options.get_value = false get merge operands associated with // get_impl_options.key via get_impl_options.merge_operands Status GetImpl(const ReadOptions& options, const Slice& key, - GetImplOptions get_impl_options); + GetImplOptions& get_impl_options); + // If `snapshot` == kMaxSequenceNumber, set a recent one inside the file. ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& options, ColumnFamilyData* cfd, SequenceNumber snapshot, ReadCallback* read_callback, - bool allow_blob = false, + bool expose_blob_index = false, bool allow_refresh = true); virtual SequenceNumber GetLastPublishedSequence() const { @@ -504,9 +592,15 @@ // in the memtables, including memtable history. If cache_only is false, // SST files will also be checked. // + // `key` should NOT have user-defined timestamp appended to user key even if + // timestamp is enabled. + // // If a key is found, *found_record_for_key will be set to true and // *seq will be set to the stored sequence number for the latest - // operation on this key or kMaxSequenceNumber if unknown. + // operation on this key or kMaxSequenceNumber if unknown. If user-defined + // timestamp is enabled for this column family and timestamp is not nullptr, + // then *timestamp will be set to the stored timestamp for the latest + // operation on this key. // If no key is found, *found_record_for_key will be set to false. // // Note: If cache_only=false, it is possible for *seq to be set to 0 if @@ -530,12 +624,15 @@ Status GetLatestSequenceForKey(SuperVersion* sv, const Slice& key, bool cache_only, SequenceNumber lower_bound_seq, - SequenceNumber* seq, + SequenceNumber* seq, std::string* timestamp, bool* found_record_for_key, - bool* is_blob_index = nullptr); + bool* is_blob_index); - Status TraceIteratorSeek(const uint32_t& cf_id, const Slice& key); - Status TraceIteratorSeekForPrev(const uint32_t& cf_id, const Slice& key); + Status TraceIteratorSeek(const uint32_t& cf_id, const Slice& key, + const Slice& lower_bound, const Slice upper_bound); + Status TraceIteratorSeekForPrev(const uint32_t& cf_id, const Slice& key, + const Slice& lower_bound, + const Slice upper_bound); #endif // ROCKSDB_LITE // Similar to GetSnapshot(), but also lets the db know that this snapshot @@ -561,9 +658,16 @@ // Return an internal iterator over the current state of the database. // The keys of this iterator are internal keys (see format.h). // The returned iterator should be deleted when no longer needed. + // If allow_unprepared_value is true, the returned iterator may defer reading + // the value and so will require PrepareValue() to be called before value(); + // allow_unprepared_value = false is convenient when this optimization is not + // useful, e.g. when reading the whole column family. + // @param read_options Must outlive the returned iterator. InternalIterator* NewInternalIterator( - Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence, - ColumnFamilyHandle* column_family = nullptr); + const ReadOptions& read_options, Arena* arena, + RangeDelAggregator* range_del_agg, SequenceNumber sequence, + ColumnFamilyHandle* column_family = nullptr, + bool allow_unprepared_value = false); LogsWithPrepTracker* logs_with_prep_tracker() { return &logs_with_prep_tracker_; @@ -687,9 +791,14 @@ const WriteController& write_controller() { return write_controller_; } - InternalIterator* NewInternalIterator( - const ReadOptions&, ColumnFamilyData* cfd, SuperVersion* super_version, - Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence); + // @param read_options Must outlive the returned iterator. + InternalIterator* NewInternalIterator(const ReadOptions& read_options, + ColumnFamilyData* cfd, + SuperVersion* super_version, + Arena* arena, + RangeDelAggregator* range_del_agg, + SequenceNumber sequence, + bool allow_unprepared_value); // hollow transactions shell used for recovery. // these will then be passed to TransactionDB so that @@ -817,8 +926,8 @@ InstrumentedMutex* mutex() const { return &mutex_; } // Initialize a brand new DB. The DB directory is expected to be empty before - // calling it. - Status NewDB(); + // calling it. Push new manifest file name into `new_filenames`. + Status NewDB(std::vector* new_filenames); // This is to be used only by internal rocksdb classes. static Status Open(const DBOptions& db_options, const std::string& name, @@ -826,8 +935,9 @@ std::vector* handles, DB** dbptr, const bool seq_per_batch, const bool batch_per_txn); - static Status CreateAndNewDirectory(Env* env, const std::string& dirname, - std::unique_ptr* directory); + static IOStatus CreateAndNewDirectory( + FileSystem* fs, const std::string& dirname, + std::unique_ptr* directory); // find stats map from stats_history_ with smallest timestamp in // the range of [start_time, end_time) @@ -842,13 +952,15 @@ int max_entries_to_print, std::string* out_str); + VersionSet* GetVersionSet() const { return versions_.get(); } + #ifndef NDEBUG // Compact any files in the named level that overlap [*begin, *end] Status TEST_CompactRange(int level, const Slice* begin, const Slice* end, ColumnFamilyHandle* column_family = nullptr, bool disallow_trivial_move = false); - void TEST_SwitchWAL(); + Status TEST_SwitchWAL(); bool TEST_UnableToReleaseOldestLog() { return unable_to_release_oldest_log_; } @@ -872,6 +984,9 @@ Status TEST_AtomicFlushMemTables(const autovector& cfds, const FlushOptions& flush_opts); + // Wait for background threads to complete scheduled work. + Status TEST_WaitForBackgroundWork(); + // Wait for memtable compaction Status TEST_WaitForFlushMemTable(ColumnFamilyHandle* column_family = nullptr); @@ -880,9 +995,15 @@ // is only for the special test of CancelledCompactions Status TEST_WaitForCompact(bool waitUnscheduled = false); + // Wait for any background purge + Status TEST_WaitForPurge(); + + // Get the background error status + Status TEST_GetBGError(); + // Return the maximum overlapping data (in bytes) at next level for any // file at a level >= 1. - int64_t TEST_MaxNextLevelOverlappingBytes( + uint64_t TEST_MaxNextLevelOverlappingBytes( ColumnFamilyHandle* column_family = nullptr); // Return the current manifest file no. @@ -894,8 +1015,10 @@ // get total level0 file size. Only for testing. uint64_t TEST_GetLevel0TotalSize(); - void TEST_GetFilesMetaData(ColumnFamilyHandle* column_family, - std::vector>* metadata); + void TEST_GetFilesMetaData( + ColumnFamilyHandle* column_family, + std::vector>* metadata, + std::vector>* blob_metadata = nullptr); void TEST_LockMutex(); @@ -938,22 +1061,104 @@ int TEST_BGCompactionsAllowed() const; int TEST_BGFlushesAllowed() const; size_t TEST_GetWalPreallocateBlockSize(uint64_t write_buffer_size) const; - void TEST_WaitForDumpStatsRun(std::function callback) const; - void TEST_WaitForPersistStatsRun(std::function callback) const; - bool TEST_IsPersistentStatsEnabled() const; + void TEST_WaitForStatsDumpRun(std::function callback) const; size_t TEST_EstimateInMemoryStatsHistorySize() const; + + uint64_t TEST_GetCurrentLogNumber() const { + InstrumentedMutexLock l(mutex()); + assert(!logs_.empty()); + return logs_.back().number; + } + + const std::unordered_set& TEST_GetFilesGrabbedForPurge() const { + return files_grabbed_for_purge_; + } + +#ifndef ROCKSDB_LITE + PeriodicWorkTestScheduler* TEST_GetPeriodicWorkScheduler() const; +#endif // !ROCKSDB_LITE + #endif // NDEBUG + // persist stats to column family "_persistent_stats" + void PersistStats(); + + // dump rocksdb.stats to LOG + void DumpStats(); + + // flush LOG out of application buffer + void FlushInfoLog(); + + // Interface to block and signal the DB in case of stalling writes by + // WriteBufferManager. Each DBImpl object contains ptr to WBMStallInterface. + // When DB needs to be blocked or signalled by WriteBufferManager, + // state_ is changed accordingly. + class WBMStallInterface : public StallInterface { + public: + enum State { + BLOCKED = 0, + RUNNING, + }; + + WBMStallInterface() : state_cv_(&state_mutex_) { + MutexLock lock(&state_mutex_); + state_ = State::RUNNING; + } + + void SetState(State state) { + MutexLock lock(&state_mutex_); + state_ = state; + } + + // Change the state_ to State::BLOCKED and wait until its state is + // changed by WriteBufferManager. When stall is cleared, Signal() is + // called to change the state and unblock the DB. + void Block() override { + MutexLock lock(&state_mutex_); + while (state_ == State::BLOCKED) { + TEST_SYNC_POINT("WBMStallInterface::BlockDB"); + state_cv_.Wait(); + } + } + + // Called from WriteBufferManager. This function changes the state_ + // to State::RUNNING indicating the stall is cleared and DB can proceed. + void Signal() override { + { + MutexLock lock(&state_mutex_); + state_ = State::RUNNING; + } + state_cv_.Signal(); + } + + private: + // Conditional variable and mutex to block and + // signal the DB during stalling process. + port::Mutex state_mutex_; + port::CondVar state_cv_; + // state represting whether DB is running or blocked because of stall by + // WriteBufferManager. + State state_; + }; + + static void TEST_ResetDbSessionIdGen(); + static std::string GenerateDbSessionId(Env* env); + protected: const std::string dbname_; + // TODO(peterd): unify with VersionSet::db_id_ std::string db_id_; + // db_session_id_ is an identifier that gets reset + // every time the DB is opened + std::string db_session_id_; std::unique_ptr versions_; // Flag to check whether we allocated and own the info log file bool own_info_log_; const DBOptions initial_db_options_; Env* const env_; - std::shared_ptr fs_; + std::shared_ptr io_tracer_; const ImmutableDBOptions immutable_db_options_; + FileSystemPtr fs_; MutableDBOptions mutable_db_options_; Statistics* stats_; std::unordered_map @@ -972,6 +1177,14 @@ ColumnFamilyHandleImpl* default_cf_handle_; InternalStats* default_cf_internal_stats_; + // table_cache_ provides its own synchronization + std::shared_ptr table_cache_; + + ErrorHandler error_handler_; + + // Unified interface for logging events + EventLogger event_logger_; + // only used for dynamically adjusting max_total_wal_size. it is a sum of // [write_buffer_size * max_write_buffer_number] over all column families uint64_t max_total_in_memory_state_; @@ -1002,12 +1215,22 @@ // Default: true const bool batch_per_txn_; + // Each flush or compaction gets its own job id. this counter makes sure + // they're unique + std::atomic next_job_id_; + + std::atomic shutting_down_; + // Except in DB::Open(), WriteOptionsFile can only be called when: // Persist options to options file. // If need_mutex_lock = false, the method will lock DB mutex. // If need_enter_write_thread = false, the method will enter write thread. Status WriteOptionsFile(bool need_mutex_lock, bool need_enter_write_thread); + Status CompactRangeInternal(const CompactRangeOptions& options, + ColumnFamilyHandle* column_family, + const Slice* begin, const Slice* end); + // The following two functions can only be called when: // 1. WriteThread::Writer::EnterUnbatched() is used. // 2. db_mutex is NOT held @@ -1036,6 +1259,8 @@ #ifndef ROCKSDB_LITE void NotifyOnExternalFileIngested( ColumnFamilyData* cfd, const ExternalSstFileIngestionJob& ingestion_job); + + Status FlushForGetLiveFiles(); #endif // !ROCKSDB_LITE void NewThreadStatusCfInfo(ColumnFamilyData* cfd) const; @@ -1113,12 +1338,33 @@ // skipped. virtual Status Recover( const std::vector& column_families, - bool read_only = false, bool error_if_log_file_exist = false, - bool error_if_data_exists_in_logs = false, + bool read_only = false, bool error_if_wal_file_exists = false, + bool error_if_data_exists_in_wals = false, uint64_t* recovered_seq = nullptr); virtual bool OwnTablesAndLogs() const { return true; } + // Set DB identity file, and write DB ID to manifest if necessary. + Status SetDBId(bool read_only); + + // REQUIRES: db mutex held when calling this function, but the db mutex can + // be released and re-acquired. Db mutex will be held when the function + // returns. + // After recovery, there may be SST files in db/cf paths that are + // not referenced in the MANIFEST (e.g. + // 1. It's best effort recovery; + // 2. The VersionEdits referencing the SST files are appended to + // MANIFEST, DB crashes when syncing the MANIFEST, the VersionEdits are + // still not synced to MANIFEST during recovery.) + // We delete these SST files. In the + // meantime, we find out the largest file number present in the paths, and + // bump up the version set's next_file_number_ to be 1 + largest_file_number. + Status DeleteUnreferencedSstFiles(); + + // SetDbSessionId() should be called in the constuctor DBImpl() + // to ensure that db_session_id_ gets updated every time the DB is opened + void SetDbSessionId(); + private: friend class DB; friend class ErrorHandler; @@ -1144,7 +1390,7 @@ friend class StatsHistoryTest_PersistentStatsCreateColumnFamilies_Test; #ifndef NDEBUG friend class DBTest2_ReadCallbackTest_Test; - friend class WriteCallbackTest_WriteWithCallbackTest_Test; + friend class WriteCallbackPTest_WriteWithCallbackTest_Test; friend class XFTransactionWriteHandler; friend class DBBlobIndexTest; friend class WriteUnpreparedTransactionTest_RecoveryTest_Test; @@ -1171,6 +1417,7 @@ struct LogFileNumberSize { explicit LogFileNumberSize(uint64_t _number) : number(_number) {} + LogFileNumberSize() {} void AddSize(uint64_t new_size) { size += new_size; } uint64_t number; uint64_t size = 0; @@ -1245,21 +1492,34 @@ // Information for a manual compaction struct ManualCompactionState { + ManualCompactionState(ColumnFamilyData* _cfd, int _input_level, + int _output_level, uint32_t _output_path_id, + bool _exclusive, bool _disallow_trivial_move, + std::atomic* _canceled) + : cfd(_cfd), + input_level(_input_level), + output_level(_output_level), + output_path_id(_output_path_id), + exclusive(_exclusive), + disallow_trivial_move(_disallow_trivial_move), + canceled(_canceled) {} + ColumnFamilyData* cfd; int input_level; int output_level; uint32_t output_path_id; Status status; - bool done; - bool in_progress; // compaction request being processed? - bool incomplete; // only part of requested range compacted + bool done = false; + bool in_progress = false; // compaction request being processed? + bool incomplete = false; // only part of requested range compacted bool exclusive; // current behavior of only one manual bool disallow_trivial_move; // Force actual compaction to run - const InternalKey* begin; // nullptr means beginning of key range - const InternalKey* end; // nullptr means end of key range - InternalKey* manual_end; // how far we are compacting - InternalKey tmp_storage; // Used to keep track of compaction progress - InternalKey tmp_storage1; // Used to keep track of compaction progress + const InternalKey* begin = nullptr; // nullptr means beginning of key range + const InternalKey* end = nullptr; // nullptr means end of key range + InternalKey* manual_end = nullptr; // how far we are compacting + InternalKey tmp_storage; // Used to keep track of compaction progress + InternalKey tmp_storage1; // Used to keep track of compaction progress + std::atomic* canceled; // Compaction canceled by the user? }; struct PrepickedCompaction { // background compaction takes ownership of `compaction`. @@ -1276,6 +1536,7 @@ DBImpl* db; // background compaction takes ownership of `prepicked_compaction`. PrepickedCompaction* prepicked_compaction; + Env::Priority compaction_pri_; }; // Initialize the built-in column family for persistent stats. Depending on @@ -1293,7 +1554,7 @@ // Required: DB mutex held Status PersistentStatsProcessFormatVersion(); - Status ResumeImpl(); + Status ResumeImpl(DBRecoverContext context); void MaybeIgnoreError(Status* s) const; @@ -1332,7 +1593,7 @@ void ReleaseFileNumberFromPendingOutputs( std::unique_ptr::iterator>& v); - Status SyncClosedLogs(JobContext* job_context); + IOStatus SyncClosedLogs(JobContext* job_context); // Flush the in-memory write buffer to storage. Switches to a new // log-file/memtable and writes a new descriptor iff successful. Then @@ -1370,6 +1631,12 @@ Status WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, MemTable* mem, VersionEdit* edit); + // Get the size of a log file and, if truncate is true, truncate the + // log file to its actual size, thereby freeing preallocated space. + // Return success even if truncate fails + Status GetLogSizeAndMaybeTruncate(uint64_t wal_number, bool truncate, + LogFileNumberSize* log); + // Restore alive_log_files_ and total_log_size_ after recovery. // It needs to run only when there's no flush during recovery // (e.g. avoid_flush_during_recovery=true). May also trigger flush @@ -1380,6 +1647,10 @@ // `num_bytes` going through. Status DelayWrite(uint64_t num_bytes, const WriteOptions& write_options); + // Begin stalling of writes when memory usage increases beyond a certain + // threshold. + void WriteBufferManagerStallWrites(); + Status ThrottleLowPriWritesIfNeeded(const WriteOptions& write_options, WriteBatch* my_batch); @@ -1452,6 +1723,25 @@ } } + // TaskType is used to identify tasks in thread-pool, currently only + // differentiate manual compaction, which could be unscheduled from the + // thread-pool. + enum class TaskType : uint8_t { + kDefault = 0, + kManualCompaction = 1, + kCount = 2, + }; + + // Task tag is used to identity tasks in thread-pool, which is + // dbImpl obj address + type + inline void* GetTaskTag(TaskType type) { + return GetTaskTag(static_cast(type)); + } + + inline void* GetTaskTag(uint8_t type) { + return static_cast(static_cast(this)) + type; + } + // REQUIRES: mutex locked and in write thread. void AssignAtomicFlushSeq(const autovector& cfds); @@ -1459,7 +1749,7 @@ Status SwitchWAL(WriteContext* write_context); // REQUIRES: mutex locked and in write thread. - Status HandleWriteBufferFull(WriteContext* write_context); + Status HandleWriteBufferManagerFlush(WriteContext* write_context); // REQUIRES: mutex locked Status PreprocessWrite(const WriteOptions& write_options, bool* need_log_sync, @@ -1469,21 +1759,30 @@ WriteBatch* tmp_batch, size_t* write_with_wal, WriteBatch** to_be_cached_state); - Status WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer, - uint64_t* log_used, uint64_t* log_size); + IOStatus WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer, + uint64_t* log_used, uint64_t* log_size, + bool with_db_mutex = false, bool with_log_mutex = false); + + IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group, + log::Writer* log_writer, uint64_t* log_used, + bool need_log_sync, bool need_log_dir_sync, + SequenceNumber sequence); + + IOStatus ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group, + uint64_t* log_used, + SequenceNumber* last_sequence, size_t seq_inc); - Status WriteToWAL(const WriteThread::WriteGroup& write_group, - log::Writer* log_writer, uint64_t* log_used, - bool need_log_sync, bool need_log_dir_sync, - SequenceNumber sequence); - - Status ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group, - uint64_t* log_used, SequenceNumber* last_sequence, - size_t seq_inc); + // Used by WriteImpl to update bg_error_ if paranoid check is enabled. + // Caller must hold mutex_. + void WriteStatusCheckOnLocked(const Status& status); // Used by WriteImpl to update bg_error_ if paranoid check is enabled. void WriteStatusCheck(const Status& status); + // Used by WriteImpl to update bg_error_ when IO error happens, e.g., write + // WAL, sync WAL fails, if paranoid check is enabled. + void IOStatusCheck(const IOStatus& status); + // Used by WriteImpl to update bg_error_ in case of memtable insert error. void MemTableInsertStatusCheck(const Status& memtable_insert_status); @@ -1517,7 +1816,7 @@ // specified value, this flush request is considered to have completed its // work of flushing this column family. After completing the work for all // column families in this request, this flush is considered complete. - typedef std::vector> FlushRequest; + using FlushRequest = std::vector>; void GenerateFlushRequest(const autovector& cfds, FlushRequest* req); @@ -1558,18 +1857,12 @@ LogBuffer* log_buffer); // Schedule background tasks - void StartTimedTasks(); + void StartPeriodicWorkScheduler(); void PrintStatistics(); size_t EstimateInMemoryStatsHistorySize() const; - // persist stats to column family "_persistent_stats" - void PersistStats(); - - // dump rocksdb.stats to LOG - void DumpStats(); - // Return the minimum empty level that could hold the total data in the // input level. Return the input level, if such level could not be found. int FindMinimumEmptyLevelFitting(ColumnFamilyData* cfd, @@ -1591,14 +1884,16 @@ std::unique_ptr* token, LogBuffer* log_buffer); // helper function to call after some of the logs_ were synced - void MarkLogsSynced(uint64_t up_to, bool synced_dir, const Status& status); + Status MarkLogsSynced(uint64_t up_to, bool synced_dir); + // WALs with log number up to up_to are not synced successfully. + void MarkLogsNotSynced(uint64_t up_to); SnapshotImpl* GetSnapshotImpl(bool is_write_conflict_boundary, bool lock = true); uint64_t GetMaxTotalWalSize() const; - Directory* GetDataDir(ColumnFamilyData* cfd, size_t path_id) const; + FSDirectory* GetDataDir(ColumnFamilyData* cfd, size_t path_id) const; Status CloseHelper(); @@ -1648,8 +1943,8 @@ size_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const; Env::WriteLifeTimeHint CalculateWALWriteHint() { return Env::WLTH_SHORT; } - Status CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number, - size_t preallocate_block_size, log::Writer** new_log); + IOStatus CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number, + size_t preallocate_block_size, log::Writer** new_log); // Validate self-consistency of DB options static Status ValidateOptions(const DBOptions& db_options); @@ -1727,14 +2022,15 @@ // to have acquired the SuperVersion and pass in a snapshot sequence number // in order to construct the LookupKeys. The start_key and num_keys specify // the range of keys in the sorted_keys vector for a single column family. - void MultiGetImpl( + Status MultiGetImpl( const ReadOptions& read_options, size_t start_key, size_t num_keys, autovector* sorted_keys, - SuperVersion* sv, SequenceNumber snap_seqnum, ReadCallback* callback, - bool* is_blob_index); + SuperVersion* sv, SequenceNumber snap_seqnum, ReadCallback* callback); - // table_cache_ provides its own synchronization - std::shared_ptr table_cache_; + Status DisableFileDeletionsWithLock(); + + Status IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd, + std::string ts_low); // Lock over the persistent DB state. Non-nullptr iff successfully acquired. FileLock* db_lock_; @@ -1749,8 +2045,13 @@ // mutex_, the order should be first mutex_ and then log_write_mutex_. InstrumentedMutex log_write_mutex_; - std::atomic shutting_down_; - std::atomic manual_compaction_paused_; + // If zero, manual compactions are allowed to proceed. If non-zero, manual + // compactions may still be running, but will quickly fail with + // `Status::Incomplete`. The value indicates how many threads have paused + // manual compactions. It is accessed in read mode outside the DB mutex in + // compaction code paths. + std::atomic manual_compaction_paused_; + // This condition variable is signaled on these conditions: // * whenever bg_compaction_scheduled_ goes down to 0 // * if AnyManualCompaction, whenever a compaction finishes, even if it hasn't @@ -1778,7 +2079,7 @@ // accessed from the same write_thread_ without any locks. With // two_write_queues writes, where it can be updated in different threads, // read and writes are protected by log_write_mutex_ instead. This is to avoid - // expesnive mutex_ lock during WAL write, which update log_empty_. + // expensive mutex_ lock during WAL write, which update log_empty_. bool log_empty_; ColumnFamilyHandleImpl* persist_stats_cf_handle_; @@ -1786,12 +2087,15 @@ bool persistent_stats_cfd_exists_ = true; // Without two_write_queues, read and writes to alive_log_files_ are - // protected by mutex_. However since back() is never popped, and push_back() - // is done only from write_thread_, the same thread can access the item - // reffered by back() without mutex_. With two_write_queues_, writes + // protected by mutex_. With two_write_queues_, writes // are protected by locking both mutex_ and log_write_mutex_, and reads must // be under either mutex_ or log_write_mutex_. std::deque alive_log_files_; + // Caching the result of `alive_log_files_.back()` so that we do not have to + // call `alive_log_files_.back()` in the write thread (WriteToWAL()) which + // requires locking db mutex if log_mutex_ is not already held in + // two-write-queues mode. + std::deque::reverse_iterator alive_log_files_tail_; // Log files that aren't fully synced, and the current log file. // Synchronization: // - push_back() is done from write_thread_ with locked mutex_ and @@ -1895,7 +2199,7 @@ std::unordered_map purge_files_; // A vector to store the file numbers that have been assigned to certain - // JobContext. Current implementation tracks ssts only. + // JobContext. Current implementation tracks table and blob files only. std::unordered_set files_grabbed_for_purge_; // A queue to store log writers to close @@ -1952,10 +2256,6 @@ // Number of threads intending to write to memtable std::atomic pending_memtable_writes_ = {}; - // Each flush or compaction gets its own job id. this counter makes sure - // they're unique - std::atomic next_job_id_; - // A flag indicating whether the current rocksdb database has any // data that is not yet persisted into either WAL or SST file. // Used when disableWAL is true. @@ -1984,9 +2284,6 @@ WalManager wal_manager_; #endif // ROCKSDB_LITE - // Unified interface for logging events - EventLogger event_logger_; - // A value of > 0 temporarily disables scheduling of background work int bg_work_paused_; @@ -2013,15 +2310,15 @@ // Only to be set during initialization std::unique_ptr recoverable_state_pre_release_callback_; - // handle for scheduling stats dumping at fixed intervals - // REQUIRES: mutex locked - std::unique_ptr thread_dump_stats_; - - // handle for scheduling stats snapshoting at fixed intervals - // REQUIRES: mutex locked - std::unique_ptr thread_persist_stats_; +#ifndef ROCKSDB_LITE + // Scheduler to run DumpStats(), PersistStats(), and FlushInfoLog(). + // Currently, it always use a global instance from + // PeriodicWorkScheduler::Default(). Only in unittest, it can be overrided by + // PeriodicWorkTestScheduler. + PeriodicWorkScheduler* periodic_work_scheduler_; +#endif - // When set, we use a separate queue for writes that dont write to memtable. + // When set, we use a separate queue for writes that don't write to memtable. // In 2PC these are the writes at Prepare phase. const bool two_write_queues_; const bool manual_wal_flush_; @@ -2053,8 +2350,10 @@ // Flag to check whether Close() has been called on this DB bool closed_; - - ErrorHandler error_handler_; + // save the closing status, for re-calling the close() + Status closing_status_; + // mutex for DB::Close() + InstrumentedMutex closing_mutex_; // Conditional variable to coordinate installation of atomic flush results. // With atomic flush, each bg thread installs the result of flushing multiple @@ -2068,11 +2367,22 @@ InstrumentedCondVar atomic_flush_install_cv_; bool wal_in_db_path_; + + BlobFileCompletionCallback blob_callback_; + + // Pointer to WriteBufferManager stalling interface. + std::unique_ptr wbm_stall_; + + // Indicate if deprecation warning message is logged before. Will be removed + // soon with the deprecated feature. + std::atomic_bool iter_start_seqnum_deprecation_warned_{false}; }; -extern Options SanitizeOptions(const std::string& db, const Options& src); +extern Options SanitizeOptions(const std::string& db, const Options& src, + bool read_only = false); -extern DBOptions SanitizeOptions(const std::string& db, const DBOptions& src); +extern DBOptions SanitizeOptions(const std::string& db, const DBOptions& src, + bool read_only = false); extern CompressionType GetCompressionFlush( const ImmutableCFOptions& ioptions, @@ -2084,18 +2394,37 @@ // `memtables_to_flush`) will be flushed and thus will not depend on any WAL // file. // The function is only applicable to 2pc mode. -extern uint64_t PrecomputeMinLogNumberToKeep( +extern uint64_t PrecomputeMinLogNumberToKeep2PC( VersionSet* vset, const ColumnFamilyData& cfd_to_flush, - autovector edit_list, + const autovector& edit_list, const autovector& memtables_to_flush, LogsWithPrepTracker* prep_tracker); +// For atomic flush. +extern uint64_t PrecomputeMinLogNumberToKeep2PC( + VersionSet* vset, const autovector& cfds_to_flush, + const autovector>& edit_lists, + const autovector*>& memtables_to_flush, + LogsWithPrepTracker* prep_tracker); + +// In non-2PC mode, WALs with log number < the returned number can be +// deleted after the cfd_to_flush column family is flushed successfully. +extern uint64_t PrecomputeMinLogNumberToKeepNon2PC( + VersionSet* vset, const ColumnFamilyData& cfd_to_flush, + const autovector& edit_list); +// For atomic flush. +extern uint64_t PrecomputeMinLogNumberToKeepNon2PC( + VersionSet* vset, const autovector& cfds_to_flush, + const autovector>& edit_lists); // `cfd_to_flush` is the column family whose memtable will be flushed and thus // will not depend on any WAL file. nullptr means no memtable is being flushed. // The function is only applicable to 2pc mode. extern uint64_t FindMinPrepLogReferencedByMemTable( - VersionSet* vset, const ColumnFamilyData* cfd_to_flush, - const autovector& memtables_to_flush); + VersionSet* vset, const autovector& memtables_to_flush); +// For atomic flush. +extern uint64_t FindMinPrepLogReferencedByMemTable( + VersionSet* vset, + const autovector*>& memtables_to_flush); // Fix user-supplied options to be reasonable template diff -Nru mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc --- mariadb-10.11.11/storage/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc 2025-01-30 11:01:26.000000000 +0000 +++ mariadb-10.11.13/storage/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc 2025-05-19 16:14:27.000000000 +0000 @@ -6,14 +6,15 @@ // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "db/db_impl/db_impl.h" - #include +#include #include "db/builder.h" +#include "db/db_impl/db_impl.h" #include "db/error_handler.h" #include "db/event_helpers.h" #include "file/sst_file_manager_impl.h" +#include "logging/logging.h" #include "monitoring/iostats_context_imp.h" #include "monitoring/perf_context_imp.h" #include "monitoring/thread_status_updater.h" @@ -36,8 +37,10 @@ // Pass the current bg_error_ to SFM so it can decide what checks to // perform. If this DB instance hasn't seen any error yet, the SFM can be // optimistic and not do disk space checks - enough_room = - sfm->EnoughRoomForCompaction(cfd, inputs, error_handler_.GetBGError()); + Status bg_error = error_handler_.GetBGError(); + enough_room = sfm->EnoughRoomForCompaction(cfd, inputs, bg_error); + bg_error.PermitUncheckedError(); // bg_error is just a copy of the Status + // from the error_handler_ if (enough_room) { *sfm_reserved_compact_space = true; } @@ -79,7 +82,7 @@ return false; } -Status DBImpl::SyncClosedLogs(JobContext* job_context) { +IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) { TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start"); mutex_.AssertHeld(); autovector logs_to_sync; @@ -96,42 +99,52 @@ logs_to_sync.push_back(log.writer); } - Status s; + IOStatus io_s; if (!logs_to_sync.empty()) { mutex_.Unlock(); + assert(job_context); + for (log::Writer* log : logs_to_sync) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "[JOB %d] Syncing log #%" PRIu64, job_context->job_id, log->get_log_number()); - s = log->file()->Sync(immutable_db_options_.use_fsync); - if (!s.ok()) { + io_s = log->file()->Sync(immutable_db_options_.use_fsync); + if (!io_s.ok()) { break; } if (immutable_db_options_.recycle_log_file_num > 0) { - s = log->Close(); - if (!s.ok()) { + io_s = log->Close(); + if (!io_s.ok()) { break; } } } - if (s.ok()) { - s = directories_.GetWalDir()->Fsync(); + if (io_s.ok()) { + io_s = directories_.GetWalDir()->FsyncWithDirOptions( + IOOptions(), nullptr, + DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced)); } + TEST_SYNC_POINT_CALLBACK("DBImpl::SyncClosedLogs:BeforeReLock", + /*arg=*/nullptr); mutex_.Lock(); // "number <= current_log_number - 1" is equivalent to // "number < current_log_number". - MarkLogsSynced(current_log_number - 1, true, s); - if (!s.ok()) { - error_handler_.SetBGError(s, BackgroundErrorReason::kFlush); + if (io_s.ok()) { + io_s = status_to_io_status(MarkLogsSynced(current_log_number - 1, true)); + } else { + MarkLogsNotSynced(current_log_number - 1); + } + if (!io_s.ok()) { TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed"); - return s; + return io_s; } } - return s; + TEST_SYNC_POINT("DBImpl::SyncClosedLogs:end"); + return io_s; } Status DBImpl::FlushMemTableToOutputFile( @@ -143,44 +156,98 @@ SnapshotChecker* snapshot_checker, LogBuffer* log_buffer, Env::Priority thread_pri) { mutex_.AssertHeld(); + assert(cfd); + assert(cfd->imm()); assert(cfd->imm()->NumNotFlushed() != 0); assert(cfd->imm()->IsFlushPending()); + assert(versions_); + assert(versions_->GetColumnFamilySet()); + // If there are more than one column families, we need to make sure that + // all the log files except the most recent one are synced. Otherwise if + // the host crashes after flushing and before WAL is persistent, the + // flushed SST may contain data from write batches whose updates to + // other (unflushed) column families are missing. + const bool needs_to_sync_closed_wals = + logfile_number_ > 0 && + versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1; + + // If needs_to_sync_closed_wals is true, we need to record the current + // maximum memtable ID of this column family so that a later PickMemtables() + // call will not pick memtables whose IDs are higher. This is due to the fact + // that SyncClosedLogs() may release the db mutex, and memtable switch can + // happen for this column family in the meantime. The newly created memtables + // have their data backed by unsynced WALs, thus they cannot be included in + // this flush job. + // Another reason why we must record the current maximum memtable ID of this + // column family: SyncClosedLogs() may release db mutex, thus it's possible + // for application to continue to insert into memtables increasing db's + // sequence number. The application may take a snapshot, but this snapshot is + // not included in `snapshot_seqs` which will be passed to flush job because + // `snapshot_seqs` has already been computed before this function starts. + // Recording the max memtable ID ensures that the flush job does not flush + // a memtable without knowing such snapshot(s). + uint64_t max_memtable_id = needs_to_sync_closed_wals + ? cfd->imm()->GetLatestMemTableID() + : port::kMaxUint64; + + // If needs_to_sync_closed_wals is false, then the flush job will pick ALL + // existing memtables of the column family when PickMemTable() is called + // later. Although we won't call SyncClosedLogs() in this case, we may still + // call the callbacks of the listeners, i.e. NotifyOnFlushBegin() which also + // releases and re-acquires the db mutex. In the meantime, the application + // can still insert into the memtables and increase the db's sequence number. + // The application can take a snapshot, hoping that the latest visible state + // to this snapshto is preserved. This is hard to guarantee since db mutex + // not held. This newly-created snapshot is not included in `snapshot_seqs` + // and the flush job is unaware of its presence. Consequently, the flush job + // may drop certain keys when generating the L0, causing incorrect data to be + // returned for snapshot read using this snapshot. + // To address this, we make sure NotifyOnFlushBegin() executes after memtable + // picking so that no new snapshot can be taken between the two functions. FlushJob flush_job( - dbname_, cfd, immutable_db_options_, mutable_cf_options, - nullptr /* memtable_id */, file_options_for_compaction_, versions_.get(), - &mutex_, &shutting_down_, snapshot_seqs, earliest_write_conflict_snapshot, - snapshot_checker, job_context, log_buffer, directories_.GetDbDir(), - GetDataDir(cfd, 0U), + dbname_, cfd, immutable_db_options_, mutable_cf_options, max_memtable_id, + file_options_for_compaction_, versions_.get(), &mutex_, &shutting_down_, + snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, + job_context, log_buffer, directories_.GetDbDir(), GetDataDir(cfd, 0U), GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_, &event_logger_, mutable_cf_options.report_bg_io_stats, - true /* sync_output_directory */, true /* write_manifest */, thread_pri); - + true /* sync_output_directory */, true /* write_manifest */, thread_pri, + io_tracer_, db_id_, db_session_id_, cfd->GetFullHistoryTsLow(), + &blob_callback_); FileMetaData file_meta; + Status s; + bool need_cancel = false; + IOStatus log_io_s = IOStatus::OK(); + if (needs_to_sync_closed_wals) { + // SyncClosedLogs() may unlock and re-lock the db_mutex. + log_io_s = SyncClosedLogs(job_context); + if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() && + !log_io_s.IsColumnFamilyDropped()) { + error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush); + } + } else { + TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip"); + } + s = log_io_s; + + // If the log sync failed, we do not need to pick memtable. Otherwise, + // num_flush_not_started_ needs to be rollback. TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables"); - flush_job.PickMemTable(); - TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:AfterPickMemtables"); + if (s.ok()) { + flush_job.PickMemTable(); + need_cancel = true; + } + TEST_SYNC_POINT_CALLBACK( + "DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", &flush_job); #ifndef ROCKSDB_LITE // may temporarily unlock and lock the mutex. NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id); #endif // ROCKSDB_LITE - Status s; - if (logfile_number_ > 0 && - versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1) { - // If there are more than one column families, we need to make sure that - // all the log files except the most recent one are synced. Otherwise if - // the host crashes after flushing and before WAL is persistent, the - // flushed SST may contain data from write batches whose updates to - // other column families are missing. - // SyncClosedLogs() may unlock and re-lock the db_mutex. - s = SyncClosedLogs(job_context); - } else { - TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip"); - } - + bool switched_to_mempurge = false; // Within flush_job.Run, rocksdb may call event listener to notify // file creation and deletion. // @@ -188,10 +255,19 @@ // and EventListener callback will be called when the db_mutex // is unlocked by the current thread. if (s.ok()) { - s = flush_job.Run(&logs_with_prep_tracker_, &file_meta); - } else { + s = flush_job.Run(&logs_with_prep_tracker_, &file_meta, + &switched_to_mempurge); + need_cancel = false; + } + + if (!s.ok() && need_cancel) { flush_job.Cancel(); } + IOStatus io_s = IOStatus::OK(); + io_s = flush_job.io_status(); + if (s.ok()) { + s = io_s; + } if (s.ok()) { InstallSuperVersionAndScheduleWork(cfd, superversion_context, @@ -199,17 +275,66 @@ if (made_progress) { *made_progress = true; } + + const std::string& column_family_name = cfd->GetName(); + + Version* const current = cfd->current(); + assert(current); + + const VersionStorageInfo* const storage_info = current->storage_info(); + assert(storage_info); + VersionStorageInfo::LevelSummaryStorage tmp; ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n", - cfd->GetName().c_str(), - cfd->current()->storage_info()->LevelSummary(&tmp)); + column_family_name.c_str(), + storage_info->LevelSummary(&tmp)); + + const auto& blob_files = storage_info->GetBlobFiles(); + if (!blob_files.empty()) { + ROCKS_LOG_BUFFER(log_buffer, + "[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 + "\n", + column_family_name.c_str(), blob_files.begin()->first, + blob_files.rbegin()->first); + } } if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) { - Status new_bg_error = s; - error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); - } - if (s.ok()) { + if (!io_s.ok() && !io_s.IsShutdownInProgress() && + !io_s.IsColumnFamilyDropped()) { + assert(log_io_s.ok()); + // Error while writing to MANIFEST. + // In fact, versions_->io_status() can also be the result of renaming + // CURRENT file. With current code, it's just difficult to tell. So just + // be pessimistic and try write to a new MANIFEST. + // TODO: distinguish between MANIFEST write and CURRENT renaming + if (!versions_->io_status().ok()) { + // If WAL sync is successful (either WAL size is 0 or there is no IO + // error), all the Manifest write will be map to soft error. + // TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor is + // needed. + error_handler_.SetBGError(io_s, + BackgroundErrorReason::kManifestWriteNoWAL); + } else { + // If WAL sync is successful (either WAL size is 0 or there is no IO + // error), all the other SST file write errors will be set as + // kFlushNoWAL. + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL); + } + } else { + if (log_io_s.ok()) { + Status new_bg_error = s; + error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); + } + } + } else { + // If we got here, then we decided not to care about the i_os status (either + // from never needing it or ignoring the flush job status + io_s.PermitUncheckedError(); + } + // If flush ran smoothly and no mempurge happened + // install new SST file path. + if (s.ok() && (!switched_to_mempurge)) { #ifndef ROCKSDB_LITE // may temporarily unlock and lock the mutex. NotifyOnFlushCompleted(cfd, mutable_cf_options, @@ -220,7 +345,10 @@ // Notify sst_file_manager that a new file was added std::string file_path = MakeTableFileName( cfd->ioptions()->cf_paths[0].path, file_meta.fd.GetNumber()); - sfm->OnAddFile(file_path); + // TODO (PR7798). We should only add the file to the FileManager if it + // exists. Otherwise, some tests may fail. Ignore the error in the + // interim. + sfm->OnAddFile(file_path).PermitUncheckedError(); if (sfm->IsMaxAllowedSpaceReached()) { Status new_bg_error = Status::SpaceLimit("Max allowed space was reached"); @@ -243,30 +371,22 @@ return AtomicFlushMemTablesToOutputFiles( bg_flush_args, made_progress, job_context, log_buffer, thread_pri); } + assert(bg_flush_args.size() == 1); std::vector snapshot_seqs; SequenceNumber earliest_write_conflict_snapshot; SnapshotChecker* snapshot_checker; GetSnapshotContext(job_context, &snapshot_seqs, &earliest_write_conflict_snapshot, &snapshot_checker); - Status status; - for (auto& arg : bg_flush_args) { - ColumnFamilyData* cfd = arg.cfd_; - MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions(); - SuperVersionContext* superversion_context = arg.superversion_context_; - Status s = FlushMemTableToOutputFile( - cfd, mutable_cf_options, made_progress, job_context, - superversion_context, snapshot_seqs, earliest_write_conflict_snapshot, - snapshot_checker, log_buffer, thread_pri); - if (!s.ok()) { - status = s; - if (!s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) { - // At this point, DB is not shutting down, nor is cfd dropped. - // Something is wrong, thus we break out of the loop. - break; - } - } - } - return status; + const auto& bg_flush_arg = bg_flush_args[0]; + ColumnFamilyData* cfd = bg_flush_arg.cfd_; + MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions(); + SuperVersionContext* superversion_context = + bg_flush_arg.superversion_context_; + Status s = FlushMemTableToOutputFile( + cfd, mutable_cf_options, made_progress, job_context, superversion_context, + snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, + log_buffer, thread_pri); + return s; } /* @@ -301,7 +421,7 @@ GetSnapshotContext(job_context, &snapshot_seqs, &earliest_write_conflict_snapshot, &snapshot_checker); - autovector distinct_output_dirs; + autovector distinct_output_dirs; autovector distinct_output_dir_paths; std::vector> jobs; std::vector all_mutable_cf_options; @@ -309,7 +429,7 @@ all_mutable_cf_options.reserve(num_cfs); for (int i = 0; i < num_cfs; ++i) { auto cfd = cfds[i]; - Directory* data_dir = GetDataDir(cfd, 0U); + FSDirectory* data_dir = GetDataDir(cfd, 0U); const std::string& curr_path = cfd->ioptions()->cf_paths[0].path; // Add to distinct output directories if eligible. Use linear search. Since @@ -329,7 +449,7 @@ all_mutable_cf_options.emplace_back(*cfd->GetLatestMutableCFOptions()); const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.back(); - const uint64_t* max_memtable_id = &(bg_flush_args[i].max_memtable_id_); + uint64_t max_memtable_id = bg_flush_args[i].max_memtable_id_; jobs.emplace_back(new FlushJob( dbname_, cfd, immutable_db_options_, mutable_cf_options, max_memtable_id, file_options_for_compaction_, versions_.get(), &mutex_, @@ -338,12 +458,16 @@ data_dir, GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_, &event_logger_, mutable_cf_options.report_bg_io_stats, false /* sync_output_directory */, false /* write_manifest */, - thread_pri)); - jobs.back()->PickMemTable(); + thread_pri, io_tracer_, db_id_, db_session_id_, + cfd->GetFullHistoryTsLow(), &blob_callback_)); } std::vector file_meta(num_cfs); + // Use of deque because vector + // is specific and doesn't allow &v[i]. + std::deque switched_to_mempurge(num_cfs, false); Status s; + IOStatus log_io_s = IOStatus::OK(); assert(num_cfs == static_cast(jobs.size())); #ifndef ROCKSDB_LITE @@ -358,23 +482,48 @@ if (logfile_number_ > 0) { // TODO (yanqin) investigate whether we should sync the closed logs for // single column family case. - s = SyncClosedLogs(job_context); + log_io_s = SyncClosedLogs(job_context); + if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() && + !log_io_s.IsColumnFamilyDropped()) { + if (total_log_size_ > 0) { + error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush); + } else { + // If the WAL is empty, we use different error reason + error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlushNoWAL); + } + } } + s = log_io_s; // exec_status stores the execution status of flush_jobs as // autovector> exec_status; + autovector io_status; + std::vector pick_status; for (int i = 0; i != num_cfs; ++i) { // Initially all jobs are not executed, with status OK. exec_status.emplace_back(false, Status::OK()); + io_status.emplace_back(IOStatus::OK()); + pick_status.push_back(false); + } + + if (s.ok()) { + for (int i = 0; i != num_cfs; ++i) { + jobs[i]->PickMemTable(); + pick_status[i] = true; + } } if (s.ok()) { + assert(switched_to_mempurge.size() == + static_cast(num_cfs)); // TODO (yanqin): parallelize jobs with threads. for (int i = 1; i != num_cfs; ++i) { exec_status[i].second = - jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i]); + jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i], + &(switched_to_mempurge.at(i))); exec_status[i].first = true; + io_status[i] = jobs[i]->io_status(); } if (num_cfs > 1) { TEST_SYNC_POINT( @@ -384,9 +533,11 @@ } assert(exec_status.size() > 0); assert(!file_meta.empty()); - exec_status[0].second = - jobs[0]->Run(&logs_with_prep_tracker_, &file_meta[0]); + exec_status[0].second = jobs[0]->Run( + &logs_with_prep_tracker_, file_meta.data() /* &file_meta[0] */, + switched_to_mempurge.empty() ? nullptr : &(switched_to_mempurge.at(0))); exec_status[0].first = true; + io_status[0] = jobs[0]->io_status(); Status error_status; for (const auto& e : exec_status) { @@ -405,6 +556,21 @@ s = error_status.ok() ? s : error_status; } + IOStatus io_s = IOStatus::OK(); + if (io_s.ok()) { + IOStatus io_error = IOStatus::OK(); + for (int i = 0; i != static_cast(io_status.size()); i++) { + if (!io_status[i].ok() && !io_status[i].IsShutdownInProgress() && + !io_status[i].IsColumnFamilyDropped()) { + io_error = io_status[i]; + } + } + io_s = io_error; + if (s.ok() && !io_s.ok()) { + s = io_s; + } + } + if (s.IsColumnFamilyDropped()) { s = Status::OK(); } @@ -413,7 +579,9 @@ // Sync on all distinct output directories. for (auto dir : distinct_output_dirs) { if (dir != nullptr) { - Status error_status = dir->Fsync(); + Status error_status = dir->FsyncWithDirOptions( + IOOptions(), nullptr, + DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced)); if (!error_status.ok()) { s = error_status; break; @@ -426,12 +594,12 @@ // Have to cancel the flush jobs that have NOT executed because we need to // unref the versions. for (int i = 0; i != num_cfs; ++i) { - if (!exec_status[i].first) { + if (pick_status[i] && !exec_status[i].first) { jobs[i]->Cancel(); } } for (int i = 0; i != num_cfs; ++i) { - if (exec_status[i].first && exec_status[i].second.ok()) { + if (exec_status[i].second.ok() && exec_status[i].first) { auto& mems = jobs[i]->GetMemTables(); cfds[i]->imm()->RollbackMemtableFlush(mems, file_meta[i].fd.GetNumber()); @@ -440,7 +608,15 @@ } if (s.ok()) { - auto wait_to_install_func = [&]() { + const auto wait_to_install_func = + [&]() -> std::pair { + if (!versions_->io_status().ok()) { + // Something went wrong elsewhere, we cannot count on waiting for our + // turn to write/sync to MANIFEST or CURRENT. Just return. + return std::make_pair(versions_->io_status(), false); + } else if (shutting_down_.load(std::memory_order_acquire)) { + return std::make_pair(Status::ShutdownInProgress(), false); + } bool ready = true; for (size_t i = 0; i != cfds.size(); ++i) { const auto& mems = jobs[i]->GetMemTables(); @@ -464,18 +640,40 @@ break; } } - return ready; + return std::make_pair(Status::OK(), !ready); }; bool resuming_from_bg_err = error_handler_.IsDBStopped(); - while ((!error_handler_.IsDBStopped() || - error_handler_.GetRecoveryError().ok()) && - !wait_to_install_func()) { + while ((!resuming_from_bg_err || error_handler_.GetRecoveryError().ok())) { + std::pair res = wait_to_install_func(); + + TEST_SYNC_POINT_CALLBACK( + "DBImpl::AtomicFlushMemTablesToOutputFiles:WaitToCommit", &res); + + if (!res.first.ok()) { + s = res.first; + break; + } else if (!res.second) { + break; + } atomic_flush_install_cv_.Wait(); + + resuming_from_bg_err = error_handler_.IsDBStopped(); } - s = resuming_from_bg_err ? error_handler_.GetRecoveryError() - : error_handler_.GetBGError(); + if (!resuming_from_bg_err) { + // If not resuming from bg err, then we determine future action based on + // whether we hit background error. + if (s.ok()) { + s = error_handler_.GetBGError(); + } + } else if (s.ok()) { + // If resuming from bg err, we still rely on wait_to_install_func()'s + // result to determine future action. If wait_to_install_func() returns + // non-ok already, then we should not proceed to flush result + // installation. + s = error_handler_.GetRecoveryError(); + } } if (s.ok()) { @@ -483,6 +681,8 @@ autovector*> mems_list; autovector mutable_cf_options_list; autovector tmp_file_meta; + autovector>*> + committed_flush_jobs_info; for (int i = 0; i != num_cfs; ++i) { const auto& mems = jobs[i]->GetMemTables(); if (!cfds[i]->IsDropped() && !mems.empty()) { @@ -490,29 +690,54 @@ mems_list.emplace_back(&mems); mutable_cf_options_list.emplace_back(&all_mutable_cf_options[i]); tmp_file_meta.emplace_back(&file_meta[i]); +#ifndef ROCKSDB_LITE + committed_flush_jobs_info.emplace_back( + jobs[i]->GetCommittedFlushJobsInfo()); +#endif //! ROCKSDB_LITE } } s = InstallMemtableAtomicFlushResults( nullptr /* imm_lists */, tmp_cfds, mutable_cf_options_list, mems_list, - versions_.get(), &mutex_, tmp_file_meta, - &job_context->memtables_to_free, directories_.GetDbDir(), log_buffer); + versions_.get(), &logs_with_prep_tracker_, &mutex_, tmp_file_meta, + committed_flush_jobs_info, &job_context->memtables_to_free, + directories_.GetDbDir(), log_buffer); } if (s.ok()) { assert(num_cfs == static_cast(job_context->superversion_contexts.size())); for (int i = 0; i != num_cfs; ++i) { + assert(cfds[i]); + if (cfds[i]->IsDropped()) { continue; } InstallSuperVersionAndScheduleWork(cfds[i], &job_context->superversion_contexts[i], all_mutable_cf_options[i]); + + const std::string& column_family_name = cfds[i]->GetName(); + + Version* const current = cfds[i]->current(); + assert(current); + + const VersionStorageInfo* const storage_info = current->storage_info(); + assert(storage_info); + VersionStorageInfo::LevelSummaryStorage tmp; ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n", - cfds[i]->GetName().c_str(), - cfds[i]->current()->storage_info()->LevelSummary(&tmp)); + column_family_name.c_str(), + storage_info->LevelSummary(&tmp)); + + const auto& blob_files = storage_info->GetBlobFiles(); + if (!blob_files.empty()) { + ROCKS_LOG_BUFFER(log_buffer, + "[%s] Blob file summary: head=%" PRIu64 + ", tail=%" PRIu64 "\n", + column_family_name.c_str(), blob_files.begin()->first, + blob_files.rbegin()->first); + } } if (made_progress) { *made_progress = true; @@ -521,7 +746,12 @@ auto sfm = static_cast( immutable_db_options_.sst_file_manager.get()); assert(all_mutable_cf_options.size() == static_cast(num_cfs)); - for (int i = 0; i != num_cfs; ++i) { + for (int i = 0; s.ok() && i != num_cfs; ++i) { + // If mempurge happened instead of Flush, + // no NotifyOnFlushCompleted call (no SST file created). + if (switched_to_mempurge[i]) { + continue; + } if (cfds[i]->IsDropped()) { continue; } @@ -530,7 +760,10 @@ if (sfm) { std::string file_path = MakeTableFileName( cfds[i]->ioptions()->cf_paths[0].path, file_meta[i].fd.GetNumber()); - sfm->OnAddFile(file_path); + // TODO (PR7798). We should only add the file to the FileManager if it + // exists. Otherwise, some tests may fail. Ignore the error in the + // interim. + sfm->OnAddFile(file_path).PermitUncheckedError(); if (sfm->IsMaxAllowedSpaceReached() && error_handler_.GetBGError().ok()) { Status new_bg_error = @@ -543,9 +776,35 @@ #endif // ROCKSDB_LITE } - if (!s.ok() && !s.IsShutdownInProgress()) { - Status new_bg_error = s; - error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); + // Need to undo atomic flush if something went wrong, i.e. s is not OK and + // it is not because of CF drop. + if (!s.ok() && !s.IsColumnFamilyDropped()) { + if (!io_s.ok() && !io_s.IsColumnFamilyDropped()) { + assert(log_io_s.ok()); + // Error while writing to MANIFEST. + // In fact, versions_->io_status() can also be the result of renaming + // CURRENT file. With current code, it's just difficult to tell. So just + // be pessimistic and try write to a new MANIFEST. + // TODO: distinguish between MANIFEST write and CURRENT renaming + if (!versions_->io_status().ok()) { + // If WAL sync is successful (either WAL size is 0 or there is no IO + // error), all the Manifest write will be map to soft error. + // TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor + // is needed. + error_handler_.SetBGError(io_s, + BackgroundErrorReason::kManifestWriteNoWAL); + } else { + // If WAL sync is successful (either WAL size is 0 or there is no IO + // error), all the other SST file write errors will be set as + // kFlushNoWAL. + error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL); + } + } else { + if (log_io_s.ok()) { + Status new_bg_error = s; + error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush); + } + } } return s; @@ -644,29 +903,128 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, - const Slice* begin, const Slice* end) { - auto cfh = reinterpret_cast(column_family); + const Slice* begin_without_ts, + const Slice* end_without_ts) { + if (manual_compaction_paused_.load(std::memory_order_acquire) > 0) { + return Status::Incomplete(Status::SubCode::kManualCompactionPaused); + } + + if (options.canceled && options.canceled->load(std::memory_order_acquire)) { + return Status::Incomplete(Status::SubCode::kManualCompactionPaused); + } + + const Comparator* const ucmp = column_family->GetComparator(); + assert(ucmp); + size_t ts_sz = ucmp->timestamp_size(); + if (ts_sz == 0) { + return CompactRangeInternal(options, column_family, begin_without_ts, + end_without_ts); + } + + std::string begin_str; + std::string end_str; + + // CompactRange compact all keys: [begin, end] inclusively. Add maximum + // timestamp to include all `begin` keys, and add minimal timestamp to include + // all `end` keys. + if (begin_without_ts != nullptr) { + AppendKeyWithMaxTimestamp(&begin_str, *begin_without_ts, ts_sz); + } + if (end_without_ts != nullptr) { + AppendKeyWithMinTimestamp(&end_str, *end_without_ts, ts_sz); + } + Slice begin(begin_str); + Slice end(end_str); + + Slice* begin_with_ts = begin_without_ts ? &begin : nullptr; + Slice* end_with_ts = end_without_ts ? &end : nullptr; + + return CompactRangeInternal(options, column_family, begin_with_ts, + end_with_ts); +} + +Status DBImpl::IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family, + std::string ts_low) { + ColumnFamilyData* cfd = nullptr; + if (column_family == nullptr) { + cfd = default_cf_handle_->cfd(); + } else { + auto cfh = static_cast_with_check(column_family); + assert(cfh != nullptr); + cfd = cfh->cfd(); + } + assert(cfd != nullptr && cfd->user_comparator() != nullptr); + if (cfd->user_comparator()->timestamp_size() == 0) { + return Status::InvalidArgument( + "Timestamp is not enabled in this column family"); + } + if (cfd->user_comparator()->timestamp_size() != ts_low.size()) { + return Status::InvalidArgument("ts_low size mismatch"); + } + return IncreaseFullHistoryTsLowImpl(cfd, ts_low); +} + +Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd, + std::string ts_low) { + VersionEdit edit; + edit.SetColumnFamily(cfd->GetID()); + edit.SetFullHistoryTsLow(ts_low); + + InstrumentedMutexLock l(&mutex_); + std::string current_ts_low = cfd->GetFullHistoryTsLow(); + const Comparator* ucmp = cfd->user_comparator(); + assert(ucmp->timestamp_size() == ts_low.size() && !ts_low.empty()); + if (!current_ts_low.empty() && + ucmp->CompareTimestamp(ts_low, current_ts_low) < 0) { + return Status::InvalidArgument( + "Cannot decrease full_history_timestamp_low"); + } + + return versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit, + &mutex_); +} + +Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options, + ColumnFamilyHandle* column_family, + const Slice* begin, const Slice* end) { + auto cfh = static_cast_with_check(column_family); auto cfd = cfh->cfd(); if (options.target_path_id >= cfd->ioptions()->cf_paths.size()) { return Status::InvalidArgument("Invalid target path ID"); } - bool exclusive = options.exclusive_manual_compaction; - bool flush_needed = true; + + // Update full_history_ts_low if it's set + if (options.full_history_ts_low != nullptr && + !options.full_history_ts_low->empty()) { + std::string ts_low = options.full_history_ts_low->ToString(); + if (begin != nullptr || end != nullptr) { + return Status::InvalidArgument( + "Cannot specify compaction range with full_history_ts_low"); + } + Status s = IncreaseFullHistoryTsLowImpl(cfd, ts_low); + if (!s.ok()) { + LogFlush(immutable_db_options_.info_log); + return s; + } + } + + Status s; if (begin != nullptr && end != nullptr) { // TODO(ajkr): We could also optimize away the flush in certain cases where // one/both sides of the interval are unbounded. But it requires more // changes to RangesOverlapWithMemtables. Range range(*begin, *end); SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); - cfd->RangesOverlapWithMemtables({range}, super_version, &flush_needed); + s = cfd->RangesOverlapWithMemtables( + {range}, super_version, immutable_db_options_.allow_data_in_errors, + &flush_needed); CleanupSuperVersion(super_version); } - Status s; - if (flush_needed) { + if (s.ok() && flush_needed) { FlushOptions fo; fo.allow_write_stall = options.allow_write_stall; if (immutable_db_options_.atomic_flush) { @@ -686,25 +1044,9 @@ } } - int max_level_with_files = 0; - // max_file_num_to_ignore can be used to filter out newly created SST files, - // useful for bottom level compaction in a manual compaction - uint64_t max_file_num_to_ignore = port::kMaxUint64; - uint64_t next_file_number = port::kMaxUint64; - { - InstrumentedMutexLock l(&mutex_); - Version* base = cfd->current(); - for (int level = 1; level < base->storage_info()->num_non_empty_levels(); - level++) { - if (base->storage_info()->OverlapInLevel(level, begin, end)) { - max_level_with_files = level; - } - } - next_file_number = versions_->current_next_file_number(); - } - - int final_output_level = 0; - + constexpr int kInvalidLevel = -1; + int final_output_level = kInvalidLevel; + bool exclusive = options.exclusive_manual_compaction; if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal && cfd->NumberLevels() > 1) { // Always compact all files together. @@ -715,70 +1057,132 @@ } s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels, final_output_level, options, begin, end, exclusive, - false, max_file_num_to_ignore); + false, port::kMaxUint64); } else { - for (int level = 0; level <= max_level_with_files; level++) { - int output_level; - // in case the compaction is universal or if we're compacting the - // bottom-most level, the output level will be the same as input one. - // level 0 can never be the bottommost level (i.e. if all files are in - // level 0, we will compact to level 1) - if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal || - cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { - output_level = level; - } else if (level == max_level_with_files && level > 0) { - if (options.bottommost_level_compaction == - BottommostLevelCompaction::kSkip) { - // Skip bottommost level compaction - continue; - } else if (options.bottommost_level_compaction == - BottommostLevelCompaction::kIfHaveCompactionFilter && - cfd->ioptions()->compaction_filter == nullptr && - cfd->ioptions()->compaction_filter_factory == nullptr) { - // Skip bottommost level compaction since we don't have a compaction - // filter - continue; + int first_overlapped_level = kInvalidLevel; + int max_overlapped_level = kInvalidLevel; + { + SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); + Version* current_version = super_version->current; + ReadOptions ro; + ro.total_order_seek = true; + bool overlap; + for (int level = 0; + level < current_version->storage_info()->num_non_empty_levels(); + level++) { + overlap = true; + if (begin != nullptr && end != nullptr) { + Status status = current_version->OverlapWithLevelIterator( + ro, file_options_, *begin, *end, level, &overlap); + if (!status.ok()) { + overlap = current_version->storage_info()->OverlapInLevel( + level, begin, end); + } + } else { + overlap = current_version->storage_info()->OverlapInLevel(level, + begin, end); } - output_level = level; - // update max_file_num_to_ignore only for bottom level compaction - // because data in newly compacted files in middle levels may still need - // to be pushed down - max_file_num_to_ignore = next_file_number; - } else { - output_level = level + 1; - if (cfd->ioptions()->compaction_style == kCompactionStyleLevel && - cfd->ioptions()->level_compaction_dynamic_level_bytes && - level == 0) { - output_level = ColumnFamilyData::kCompactToBaseLevel; + if (overlap) { + if (first_overlapped_level == kInvalidLevel) { + first_overlapped_level = level; + } + max_overlapped_level = level; } } - s = RunManualCompaction(cfd, level, output_level, options, begin, end, - exclusive, false, max_file_num_to_ignore); - if (!s.ok()) { - break; - } - if (output_level == ColumnFamilyData::kCompactToBaseLevel) { - final_output_level = cfd->NumberLevels() - 1; - } else if (output_level > final_output_level) { - final_output_level = output_level; + CleanupSuperVersion(super_version); + } + if (s.ok() && first_overlapped_level != kInvalidLevel) { + // max_file_num_to_ignore can be used to filter out newly created SST + // files, useful for bottom level compaction in a manual compaction + uint64_t max_file_num_to_ignore = port::kMaxUint64; + uint64_t next_file_number = versions_->current_next_file_number(); + final_output_level = max_overlapped_level; + int output_level; + for (int level = first_overlapped_level; level <= max_overlapped_level; + level++) { + bool disallow_trivial_move = false; + // in case the compaction is universal or if we're compacting the + // bottom-most level, the output level will be the same as input one. + // level 0 can never be the bottommost level (i.e. if all files are in + // level 0, we will compact to level 1) + if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal || + cfd->ioptions()->compaction_style == kCompactionStyleFIFO) { + output_level = level; + } else if (level == max_overlapped_level && level > 0) { + if (options.bottommost_level_compaction == + BottommostLevelCompaction::kSkip) { + // Skip bottommost level compaction + continue; + } else if (options.bottommost_level_compaction == + BottommostLevelCompaction::kIfHaveCompactionFilter && + cfd->ioptions()->compaction_filter == nullptr && + cfd->ioptions()->compaction_filter_factory == nullptr) { + // Skip bottommost level compaction since we don't have a compaction + // filter + continue; + } + output_level = level; + // update max_file_num_to_ignore only for bottom level compaction + // because data in newly compacted files in middle levels may still + // need to be pushed down + max_file_num_to_ignore = next_file_number; + } else { + output_level = level + 1; + if (cfd->ioptions()->compaction_style == kCompactionStyleLevel && + cfd->ioptions()->level_compaction_dynamic_level_bytes && + level == 0) { + output_level = ColumnFamilyData::kCompactToBaseLevel; + } + // if it's a BottommostLevel compaction and `kForce*` compaction is + // set, disallow trivial move + if (level == max_overlapped_level && + (options.bottommost_level_compaction == + BottommostLevelCompaction::kForce || + options.bottommost_level_compaction == + BottommostLevelCompaction::kForceOptimized)) { + disallow_trivial_move = true; + } + } + s = RunManualCompaction(cfd, level, output_level, options, begin, end, + exclusive, disallow_trivial_move, + max_file_num_to_ignore); + if (!s.ok()) { + break; + } + if (output_level == ColumnFamilyData::kCompactToBaseLevel) { + final_output_level = cfd->NumberLevels() - 1; + } else if (output_level > final_output_level) { + final_output_level = output_level; + } + TEST_SYNC_POINT("DBImpl::RunManualCompaction()::1"); + TEST_SYNC_POINT("DBImpl::RunManualCompaction()::2"); } - TEST_SYNC_POINT("DBImpl::RunManualCompaction()::1"); - TEST_SYNC_POINT("DBImpl::RunManualCompaction()::2"); } } - if (!s.ok()) { + if (!s.ok() || final_output_level == kInvalidLevel) { LogFlush(immutable_db_options_.info_log); return s; } if (options.change_level) { + TEST_SYNC_POINT("DBImpl::CompactRange:BeforeRefit:1"); + TEST_SYNC_POINT("DBImpl::CompactRange:BeforeRefit:2"); + ROCKS_LOG_INFO(immutable_db_options_.info_log, "[RefitLevel] waiting for background threads to stop"); + DisableManualCompaction(); s = PauseBackgroundWork(); if (s.ok()) { + TEST_SYNC_POINT("DBImpl::CompactRange:PreRefitLevel"); s = ReFitLevel(cfd, final_output_level, options.target_level); - } - ContinueBackgroundWork(); + TEST_SYNC_POINT("DBImpl::CompactRange:PostRefitLevel"); + // ContinueBackgroundWork always return Status::OK(). + Status temp_s = ContinueBackgroundWork(); + assert(temp_s.ok()); + } + EnableManualCompaction(); + TEST_SYNC_POINT( + "DBImpl::CompactRange:PostRefitLevel:ManualCompactionEnabled"); } LogFlush(immutable_db_options_.info_log); @@ -813,11 +1217,12 @@ return Status::InvalidArgument("ColumnFamilyHandle must be non-null."); } - auto cfd = reinterpret_cast(column_family)->cfd(); + auto cfd = + static_cast_with_check(column_family)->cfd(); assert(cfd); Status s; - JobContext job_context(0, true); + JobContext job_context(next_job_id_.fetch_add(1), true); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, immutable_db_options_.info_log.get()); @@ -884,7 +1289,7 @@ if (shutting_down_.load(std::memory_order_acquire)) { return Status::ShutdownInProgress(); } - if (manual_compaction_paused_.load(std::memory_order_acquire)) { + if (manual_compaction_paused_.load(std::memory_order_acquire) > 0) { return Status::Incomplete(Status::SubCode::kManualCompactionPaused); } @@ -946,7 +1351,7 @@ assert(cfd->compaction_picker()); c.reset(cfd->compaction_picker()->CompactFiles( compact_options, input_files, output_level, version->storage_info(), - *cfd->GetLatestMutableCFOptions(), output_path_id)); + *cfd->GetLatestMutableCFOptions(), mutable_db_options_, output_path_id)); // we already sanitized the set of input files and checked for conflicts // without releasing the lock, so we're guaranteed a compaction can be formed. assert(c != nullptr); @@ -968,15 +1373,18 @@ assert(is_snapshot_supported_ || snapshots_.empty()); CompactionJobStats compaction_job_stats; CompactionJob compaction_job( - job_context->job_id, c.get(), immutable_db_options_, + job_context->job_id, c.get(), immutable_db_options_, mutable_db_options_, file_options_for_compaction_, versions_.get(), &shutting_down_, preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(), - GetDataDir(c->column_family_data(), c->output_path_id()), stats_, &mutex_, - &error_handler_, snapshot_seqs, earliest_write_conflict_snapshot, - snapshot_checker, table_cache_, &event_logger_, + GetDataDir(c->column_family_data(), c->output_path_id()), + GetDataDir(c->column_family_data(), 0), stats_, &mutex_, &error_handler_, + snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker, + table_cache_, &event_logger_, c->mutable_cf_options()->paranoid_file_checks, c->mutable_cf_options()->report_bg_io_stats, dbname_, - &compaction_job_stats, Env::Priority::USER, &manual_compaction_paused_); + &compaction_job_stats, Env::Priority::USER, io_tracer_, + &manual_compaction_paused_, nullptr, db_id_, db_session_id_, + c->column_family_data()->GetFullHistoryTsLow(), &blob_callback_); // Creating a compaction influences the compaction score because the score // takes running compactions into account (by skipping files that are already @@ -990,17 +1398,23 @@ mutex_.Unlock(); TEST_SYNC_POINT("CompactFilesImpl:0"); TEST_SYNC_POINT("CompactFilesImpl:1"); - compaction_job.Run(); + // Ignore the status here, as it will be checked in the Install down below... + compaction_job.Run().PermitUncheckedError(); TEST_SYNC_POINT("CompactFilesImpl:2"); TEST_SYNC_POINT("CompactFilesImpl:3"); mutex_.Lock(); Status status = compaction_job.Install(*c->mutable_cf_options()); if (status.ok()) { + assert(compaction_job.io_status().ok()); InstallSuperVersionAndScheduleWork(c->column_family_data(), &job_context->superversion_contexts[0], *c->mutable_cf_options()); } + // status above captures any error during compaction_job.Install, so its ok + // not check compaction_job.io_status() explicitly if we're not calling + // SetBGError + compaction_job.io_status().PermitUncheckedError(); c->ReleaseCompactionFiles(s); #ifndef ROCKSDB_LITE // Need to make sure SstFileManager does its bookkeeping @@ -1033,15 +1447,25 @@ "[%s] [JOB %d] Compaction error: %s", c->column_family_data()->GetName().c_str(), job_context->job_id, status.ToString().c_str()); - error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction); + IOStatus io_s = compaction_job.io_status(); + if (!io_s.ok()) { + error_handler_.SetBGError(io_s, BackgroundErrorReason::kCompaction); + } else { + error_handler_.SetBGError(status, BackgroundErrorReason::kCompaction); + } } if (output_file_names != nullptr) { - for (const auto newf : c->edit()->GetNewFiles()) { - (*output_file_names) - .push_back(TableFileName(c->immutable_cf_options()->cf_paths, - newf.second.fd.GetNumber(), - newf.second.fd.GetPathId())); + for (const auto& newf : c->edit()->GetNewFiles()) { + output_file_names->push_back(TableFileName( + c->immutable_options()->cf_paths, newf.second.fd.GetNumber(), + newf.second.fd.GetPathId())); + } + + for (const auto& blob_file : c->edit()->GetBlobFileAdditions()) { + output_file_names->push_back( + BlobFileName(c->immutable_options()->cf_paths.front().path, + blob_file.GetBlobFileNumber())); } } @@ -1099,9 +1523,11 @@ return; } if (c->is_manual_compaction() && - manual_compaction_paused_.load(std::memory_order_acquire)) { + manual_compaction_paused_.load(std::memory_order_acquire) > 0) { return; } + + c->SetNotifyOnCompactionCompleted(); Version* current = cfd->current(); current->Ref(); // release lock while notifying events @@ -1109,46 +1535,11 @@ TEST_SYNC_POINT("DBImpl::NotifyOnCompactionBegin::UnlockMutex"); { CompactionJobInfo info{}; - info.cf_name = cfd->GetName(); - info.status = st; - info.thread_id = env_->GetThreadID(); - info.job_id = job_id; - info.base_input_level = c->start_level(); - info.output_level = c->output_level(); - info.stats = job_stats; - info.table_properties = c->GetOutputTableProperties(); - info.compaction_reason = c->compaction_reason(); - info.compression = c->output_compression(); - for (size_t i = 0; i < c->num_input_levels(); ++i) { - for (const auto fmd : *c->inputs(i)) { - const FileDescriptor& desc = fmd->fd; - const uint64_t file_number = desc.GetNumber(); - auto fn = TableFileName(c->immutable_cf_options()->cf_paths, - file_number, desc.GetPathId()); - info.input_files.push_back(fn); - info.input_file_infos.push_back(CompactionFileInfo{ - static_cast(i), file_number, fmd->oldest_blob_file_number}); - if (info.table_properties.count(fn) == 0) { - std::shared_ptr tp; - auto s = current->GetTableProperties(&tp, fmd, &fn); - if (s.ok()) { - info.table_properties[fn] = tp; - } - } - } - } - for (const auto newf : c->edit()->GetNewFiles()) { - const FileMetaData& meta = newf.second; - const FileDescriptor& desc = meta.fd; - const uint64_t file_number = desc.GetNumber(); - info.output_files.push_back(TableFileName( - c->immutable_cf_options()->cf_paths, file_number, desc.GetPathId())); - info.output_file_infos.push_back(CompactionFileInfo{ - newf.first, file_number, meta.oldest_blob_file_number}); - } + BuildCompactionJobInfo(cfd, c, st, job_stats, job_id, current, &info); for (auto listener : immutable_db_options_.listeners) { listener->OnCompactionBegin(this, info); } + info.status.PermitUncheckedError(); } mutex_.Lock(); current->Unref(); @@ -1172,10 +1563,11 @@ if (shutting_down_.load(std::memory_order_acquire)) { return; } - if (c->is_manual_compaction() && - manual_compaction_paused_.load(std::memory_order_acquire)) { + + if (c->ShouldNotifyOnCompactionCompleted() == false) { return; } + Version* current = cfd->current(); current->Ref(); // release lock while notifying events @@ -1212,8 +1604,6 @@ SuperVersionContext sv_context(/* create_superversion */ true); - Status status; - InstrumentedMutexLock guard_lock(&mutex_); // only allow one thread refitting @@ -1232,20 +1622,32 @@ } auto* vstorage = cfd->current()->storage_info(); - if (to_level > level) { - if (level == 0) { - return Status::NotSupported( - "Cannot change from level 0 to other levels."); - } - // Check levels are empty for a trivial move - for (int l = level + 1; l <= to_level; l++) { - if (vstorage->NumLevelFiles(l) > 0) { + if (to_level != level) { + if (to_level > level) { + if (level == 0) { + refitting_level_ = false; return Status::NotSupported( - "Levels between source and target are not empty for a move."); + "Cannot change from level 0 to other levels."); + } + // Check levels are empty for a trivial move + for (int l = level + 1; l <= to_level; l++) { + if (vstorage->NumLevelFiles(l) > 0) { + refitting_level_ = false; + return Status::NotSupported( + "Levels between source and target are not empty for a move."); + } + } + } else { + // to_level < level + // Check levels are empty for a trivial move + for (int l = to_level; l < level; l++) { + if (vstorage->NumLevelFiles(l) > 0) { + refitting_level_ = false; + return Status::NotSupported( + "Levels between source and target are not empty for a move."); + } } } - } - if (to_level != level) { ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Before refitting:\n%s", cfd->GetName().c_str(), cfd->current()->DebugString().data()); @@ -1254,19 +1656,20 @@ edit.SetColumnFamily(cfd->GetID()); for (const auto& f : vstorage->LevelFiles(level)) { edit.DeleteFile(level, f->fd.GetNumber()); - edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(), - f->fd.GetFileSize(), f->smallest, f->largest, - f->fd.smallest_seqno, f->fd.largest_seqno, - f->marked_for_compaction, f->oldest_blob_file_number, - f->oldest_ancester_time, f->file_creation_time, - f->file_checksum, f->file_checksum_func_name); + edit.AddFile( + to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), + f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno, + f->marked_for_compaction, f->temperature, f->oldest_blob_file_number, + f->oldest_ancester_time, f->file_creation_time, f->file_checksum, + f->file_checksum_func_name, f->min_timestamp, f->max_timestamp); } ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Apply version edit:\n%s", cfd->GetName().c_str(), edit.DebugString().data()); - status = versions_->LogAndApply(cfd, mutable_cf_options, &edit, &mutex_, - directories_.GetDbDir()); + Status status = versions_->LogAndApply(cfd, mutable_cf_options, &edit, + &mu