Version in base suite: 0.7.12-2+deb10u1 Base version: zfs-linux_0.7.12-2+deb10u1 Target version: zfs-linux_0.7.12-2+deb10u2 Base file: /srv/ftp-master.debian.org/ftp/pool/contrib/z/zfs-linux/zfs-linux_0.7.12-2+deb10u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/contrib/z/zfs-linux/zfs-linux_0.7.12-2+deb10u2.dsc changelog | 9 control | 2 control.in | 2 patches/01937958ce85b1cd8942dbaf9a3f9768c5b02a0a.patch | 327 ++++++++++++++ patches/98bb45e27ae80145a6ce028df90fccdb23f8901d.patch | 373 +++++++++++++++++ patches/series | 2 6 files changed, 713 insertions(+), 2 deletions(-) diff: /srv/release.debian.org/tmp/iQWeu0Truw/zfs-linux-0.7.12/debian/zfs-zed.zfs-zed.init: No such file or directory diff: /srv/release.debian.org/tmp/WLIesym5rr/zfs-linux-0.7.12/debian/zfs-zed.zfs-zed.init: No such file or directory diff: /srv/release.debian.org/tmp/iQWeu0Truw/zfs-linux-0.7.12/debian/zfsutils-linux.zfs-import.init: No such file or directory diff: /srv/release.debian.org/tmp/WLIesym5rr/zfs-linux-0.7.12/debian/zfsutils-linux.zfs-import.init: No such file or directory diff: /srv/release.debian.org/tmp/iQWeu0Truw/zfs-linux-0.7.12/debian/zfsutils-linux.zfs-mount.init: No such file or directory diff: /srv/release.debian.org/tmp/WLIesym5rr/zfs-linux-0.7.12/debian/zfsutils-linux.zfs-mount.init: No such file or directory diff: /srv/release.debian.org/tmp/iQWeu0Truw/zfs-linux-0.7.12/debian/zfsutils-linux.zfs-share.init: No such file or directory diff: /srv/release.debian.org/tmp/WLIesym5rr/zfs-linux-0.7.12/debian/zfsutils-linux.zfs-share.init: No such file or directory diff -Nru zfs-linux-0.7.12/debian/changelog zfs-linux-0.7.12/debian/changelog --- zfs-linux-0.7.12/debian/changelog 2019-06-09 03:17:40.000000000 +0000 +++ zfs-linux-0.7.12/debian/changelog 2020-04-28 07:08:05.000000000 +0000 @@ -1,3 +1,12 @@ +zfs-linux (0.7.12-2+deb10u2) buster; urgency=medium + + * Cherry-pick two upstream patches to fix potential deadlock issues. + + 01937958ce85b1cd8942dbaf9a3f9768c5b02a0a + + 98bb45e27ae80145a6ce028df90fccdb23f8901d + * Let zfs-dkms pre-depends on spl-dkms (>= 0.7.12-2+deb10u1). + + -- Mo Zhou Tue, 28 Apr 2020 15:08:05 +0800 + zfs-linux (0.7.12-2+deb10u1) testing-proposed-updates; urgency=high * Patch: Disable SIMD on 4.19.37+ or 5.0+ kernels. (Closes: #929929) diff -Nru zfs-linux-0.7.12/debian/control zfs-linux-0.7.12/debian/control --- zfs-linux-0.7.12/debian/control 2019-06-09 03:17:40.000000000 +0000 +++ zfs-linux-0.7.12/debian/control 2020-04-28 07:08:05.000000000 +0000 @@ -99,7 +99,7 @@ Package: zfs-dkms Architecture: all Pre-Depends: spl-dkms (<<${source:Upstream-Version}.), - spl-dkms (>= ${source:Upstream-Version}) + spl-dkms (>= ${source:Upstream-Version}), spl-dkms (>= 0.7.12-2+deb10u1), Depends: dkms (>> 2.1.1.2-5), lsb-release, ${misc:Depends}, ${perl:Depends}, Recommends: zfs-zed, zfsutils-linux (>= ${binary:Version}), linux-libc-dev (<< 4.20~) Provides: zfs-modules diff -Nru zfs-linux-0.7.12/debian/control.in zfs-linux-0.7.12/debian/control.in --- zfs-linux-0.7.12/debian/control.in 2019-06-09 03:17:33.000000000 +0000 +++ zfs-linux-0.7.12/debian/control.in 2020-04-27 07:34:10.000000000 +0000 @@ -99,7 +99,7 @@ Package: zfs-dkms Architecture: all Pre-Depends: spl-dkms (<<${source:Upstream-Version}.), - spl-dkms (>= ${source:Upstream-Version}) + spl-dkms (>= ${source:Upstream-Version}), spl-dkms (>= 0.7.12-2+deb10u1), Depends: dkms (>> 2.1.1.2-5), lsb-release, ${misc:Depends}, ${perl:Depends}, Recommends: zfs-zed, zfsutils-linux (>= ${binary:Version}), @LINUX_COMPAT@ Provides: zfs-modules diff -Nru zfs-linux-0.7.12/debian/patches/01937958ce85b1cd8942dbaf9a3f9768c5b02a0a.patch zfs-linux-0.7.12/debian/patches/01937958ce85b1cd8942dbaf9a3f9768c5b02a0a.patch --- zfs-linux-0.7.12/debian/patches/01937958ce85b1cd8942dbaf9a3f9768c5b02a0a.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.7.12/debian/patches/01937958ce85b1cd8942dbaf9a3f9768c5b02a0a.patch 2020-04-22 05:11:49.000000000 +0000 @@ -0,0 +1,327 @@ +From 01937958ce85b1cd8942dbaf9a3f9768c5b02a0a Mon Sep 17 00:00:00 2001 +From: Matthew Ahrens +Date: Thu, 31 May 2018 10:29:12 -0700 +Subject: [PATCH] OpenZFS 9577 - remove zfs_dbuf_evict_key tsd + +The zfs_dbuf_evict_key TSD (thread-specific data) is not necessary - +we can instead pass a flag down in a few places to prevent recursive +dbuf eviction. Making this change has 3 benefits: + +1. The code semantics are easier to understand. +2. On Linux, performance is improved, because creating/removing + TSD values (by setting to NULL vs non-NULL) is expensive, and + we do it very often. +3. According to Nexenta, the current semantics can cause a + deadlock when concurrently calling dmu_objset_evict_dbufs() + (which is rare today, but they are working on a "parallel + unmount" change that triggers this more easily): + +Porting Notes: +* Minor conflict with OpenZFS 9337 which has not yet been ported. + +Authored by: Matthew Ahrens +Reviewed by: George Wilson +Reviewed by: Serapheim Dimitropoulos +Reviewed by: Brad Lewis +Reviewed-by: George Melikov +Ported-by: Brian Behlendorf + +OpenZFS-issue: https://illumos.org/issues/9577 +OpenZFS-commit: https://github.com/openzfs/openzfs/pull/645 +External-issue: DLPX-58547 +Closes #7602 +--- + include/sys/dbuf.h | 4 +-- + include/sys/dnode.h | 4 +-- + module/zfs/dbuf.c | 69 ++++++++++++++--------------------------- + module/zfs/dnode.c | 7 +++-- + module/zfs/dnode_sync.c | 17 ++++++++-- + 5 files changed, 46 insertions(+), 55 deletions(-) + +diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h +index 127acad33c7..e007e97644e 100644 +--- a/include/sys/dbuf.h ++++ b/include/sys/dbuf.h +@@ -20,7 +20,7 @@ + */ + /* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2012, 2015 by Delphix. All rights reserved. ++ * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + */ +@@ -294,7 +294,7 @@ boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj, + uint64_t dbuf_refcount(dmu_buf_impl_t *db); + + void dbuf_rele(dmu_buf_impl_t *db, void *tag); +-void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag); ++void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting); + + dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level, + uint64_t blkid); +diff --git a/include/sys/dnode.h b/include/sys/dnode.h +index 1e77e0a32ec..4e006df5535 100644 +--- a/include/sys/dnode.h ++++ b/include/sys/dnode.h +@@ -20,7 +20,7 @@ + */ + /* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2012, 2017 by Delphix. All rights reserved. ++ * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + */ + +@@ -339,7 +339,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots, + void *ref, dnode_t **dnp); + boolean_t dnode_add_ref(dnode_t *dn, void *ref); + void dnode_rele(dnode_t *dn, void *ref); +-void dnode_rele_and_unlock(dnode_t *dn, void *tag); ++void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting); + void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); + void dnode_sync(dnode_t *dn, dmu_tx_t *tx); + void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, +diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c +index 62b77bb0a1d..9694ce78b19 100644 +--- a/module/zfs/dbuf.c ++++ b/module/zfs/dbuf.c +@@ -72,8 +72,6 @@ static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh, + void *tag, dmu_buf_impl_t **dbp, int depth); + static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh); + +-uint_t zfs_dbuf_evict_key; +- + static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); + static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); + +@@ -505,14 +503,6 @@ dbuf_evict_one(void) + dmu_buf_impl_t *db; + ASSERT(!MUTEX_HELD(&dbuf_evict_lock)); + +- /* +- * Set the thread's tsd to indicate that it's processing evictions. +- * Once a thread stops evicting from the dbuf cache it will +- * reset its tsd to NULL. +- */ +- ASSERT3P(tsd_get(zfs_dbuf_evict_key), ==, NULL); +- (void) tsd_set(zfs_dbuf_evict_key, (void *)B_TRUE); +- + db = multilist_sublist_tail(mls); + while (db != NULL && mutex_tryenter(&db->db_mtx) == 0) { + db = multilist_sublist_prev(mls, db); +@@ -530,7 +520,6 @@ dbuf_evict_one(void) + } else { + multilist_sublist_unlock(mls); + } +- (void) tsd_set(zfs_dbuf_evict_key, NULL); + } + + /* +@@ -583,29 +572,6 @@ dbuf_evict_thread(void) + static void + dbuf_evict_notify(void) + { +- +- /* +- * We use thread specific data to track when a thread has +- * started processing evictions. This allows us to avoid deeply +- * nested stacks that would have a call flow similar to this: +- * +- * dbuf_rele()-->dbuf_rele_and_unlock()-->dbuf_evict_notify() +- * ^ | +- * | | +- * +-----dbuf_destroy()<--dbuf_evict_one()<--------+ +- * +- * The dbuf_eviction_thread will always have its tsd set until +- * that thread exits. All other threads will only set their tsd +- * if they are participating in the eviction process. This only +- * happens if the eviction thread is unable to process evictions +- * fast enough. To keep the dbuf cache size in check, other threads +- * can evict from the dbuf cache directly. Those threads will set +- * their tsd values so that we ensure that they only evict one dbuf +- * from the dbuf cache. +- */ +- if (tsd_get(zfs_dbuf_evict_key) != NULL) +- return; +- + /* + * We check if we should evict without holding the dbuf_evict_lock, + * because it's OK to occasionally make the wrong decision here, +@@ -681,7 +647,6 @@ dbuf_init(void) + dbuf_cache_multilist_index_func); + zfs_refcount_create(&dbuf_cache_size); + +- tsd_create(&zfs_dbuf_evict_key, NULL); + dbuf_evict_thread_exit = B_FALSE; + mutex_init(&dbuf_evict_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&dbuf_evict_cv, NULL, CV_DEFAULT, NULL); +@@ -718,7 +683,6 @@ dbuf_fini(void) + cv_wait(&dbuf_evict_cv, &dbuf_evict_lock); + } + mutex_exit(&dbuf_evict_lock); +- tsd_destroy(&zfs_dbuf_evict_key); + + mutex_destroy(&dbuf_evict_lock); + cv_destroy(&dbuf_evict_cv); +@@ -1004,7 +968,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) + db->db_state = DB_UNCACHED; + } + cv_broadcast(&db->db_changed); +- dbuf_rele_and_unlock(db, NULL); ++ dbuf_rele_and_unlock(db, NULL, B_FALSE); + } + + static int +@@ -2178,7 +2142,8 @@ dbuf_destroy(dmu_buf_impl_t *db) + * value in dnode_move(), since DB_DNODE_EXIT doesn't actually + * release any lock. + */ +- dnode_rele(dn, db); ++ mutex_enter(&dn->dn_mtx); ++ dnode_rele_and_unlock(dn, db, B_TRUE); + db->db_dnode_handle = NULL; + + dbuf_hash_remove(db); +@@ -2204,8 +2169,10 @@ dbuf_destroy(dmu_buf_impl_t *db) + * If this dbuf is referenced from an indirect dbuf, + * decrement the ref count on the indirect dbuf. + */ +- if (parent && parent != dndb) +- dbuf_rele(parent, db); ++ if (parent && parent != dndb) { ++ mutex_enter(&parent->db_mtx); ++ dbuf_rele_and_unlock(parent, db, B_TRUE); ++ } + } + + /* +@@ -2912,7 +2879,7 @@ void + dbuf_rele(dmu_buf_impl_t *db, void *tag) + { + mutex_enter(&db->db_mtx); +- dbuf_rele_and_unlock(db, tag); ++ dbuf_rele_and_unlock(db, tag, B_FALSE); + } + + void +@@ -2923,10 +2890,19 @@ dmu_buf_rele(dmu_buf_t *db, void *tag) + + /* + * dbuf_rele() for an already-locked dbuf. This is necessary to allow +- * db_dirtycnt and db_holds to be updated atomically. ++ * db_dirtycnt and db_holds to be updated atomically. The 'evicting' ++ * argument should be set if we are already in the dbuf-evicting code ++ * path, in which case we don't want to recursively evict. This allows us to ++ * avoid deeply nested stacks that would have a call flow similar to this: ++ * ++ * dbuf_rele()-->dbuf_rele_and_unlock()-->dbuf_evict_notify() ++ * ^ | ++ * | | ++ * +-----dbuf_destroy()<--dbuf_evict_one()<--------+ ++ * + */ + void +-dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) ++dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) + { + int64_t holds; + +@@ -3021,7 +2997,8 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) + db->db.db_size, db); + mutex_exit(&db->db_mtx); + +- dbuf_evict_notify(); ++ if (!evicting) ++ dbuf_evict_notify(); + } + + if (do_arc_evict) +@@ -3314,7 +3291,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) + kmem_free(dr, sizeof (dbuf_dirty_record_t)); + ASSERT(db->db_dirtycnt > 0); + db->db_dirtycnt -= 1; +- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); ++ dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE); + return; + } + +@@ -3670,7 +3647,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) + ASSERT(db->db_dirtycnt > 0); + db->db_dirtycnt -= 1; + db->db_data_pending = NULL; +- dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg); ++ dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE); + } + + static void +diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c +index 989a8ec7f69..b80aeb8b1f8 100644 +--- a/module/zfs/dnode.c ++++ b/module/zfs/dnode.c +@@ -1533,11 +1533,11 @@ void + dnode_rele(dnode_t *dn, void *tag) + { + mutex_enter(&dn->dn_mtx); +- dnode_rele_and_unlock(dn, tag); ++ dnode_rele_and_unlock(dn, tag, B_FALSE); + } + + void +-dnode_rele_and_unlock(dnode_t *dn, void *tag) ++dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) + { + uint64_t refs; + /* Get while the hold prevents the dnode from moving. */ +@@ -1568,7 +1568,8 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag) + * that the handle has zero references, but that will be + * asserted anyway when the handle gets destroyed. + */ +- dbuf_rele(db, dnh); ++ mutex_enter(&db->db_mtx); ++ dbuf_rele_and_unlock(db, dnh, evicting); + } + } + +diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c +index 2febb520630..ee86c13b1b2 100644 +--- a/module/zfs/dnode_sync.c ++++ b/module/zfs/dnode_sync.c +@@ -21,7 +21,7 @@ + + /* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2012, 2017 by Delphix. All rights reserved. ++ * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + */ + +@@ -429,6 +429,19 @@ dnode_evict_dbufs(dnode_t *dn) + avl_insert_here(&dn->dn_dbufs, db_marker, db, + AVL_BEFORE); + ++ /* ++ * We need to use the "marker" dbuf rather than ++ * simply getting the next dbuf, because ++ * dbuf_destroy() may actually remove multiple dbufs. ++ * It can call itself recursively on the parent dbuf, ++ * which may also be removed from dn_dbufs. The code ++ * flow would look like: ++ * ++ * dbuf_destroy(): ++ * dnode_rele_and_unlock(parent_dbuf, evicting=TRUE): ++ * if (!cacheable || pending_evict) ++ * dbuf_destroy() ++ */ + dbuf_destroy(db); + + db_next = AVL_NEXT(&dn->dn_dbufs, db_marker); +@@ -489,7 +502,7 @@ dnode_undirty_dbufs(list_t *list) + list_destroy(&dr->dt.di.dr_children); + } + kmem_free(dr, sizeof (dbuf_dirty_record_t)); +- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); ++ dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE); + } + } + diff -Nru zfs-linux-0.7.12/debian/patches/98bb45e27ae80145a6ce028df90fccdb23f8901d.patch zfs-linux-0.7.12/debian/patches/98bb45e27ae80145a6ce028df90fccdb23f8901d.patch --- zfs-linux-0.7.12/debian/patches/98bb45e27ae80145a6ce028df90fccdb23f8901d.patch 1970-01-01 00:00:00.000000000 +0000 +++ zfs-linux-0.7.12/debian/patches/98bb45e27ae80145a6ce028df90fccdb23f8901d.patch 2020-04-22 05:12:02.000000000 +0000 @@ -0,0 +1,373 @@ +From 98bb45e27ae80145a6ce028df90fccdb23f8901d Mon Sep 17 00:00:00 2001 +From: ilbsmart +Date: Wed, 17 Oct 2018 02:11:24 +0800 +Subject: [PATCH] deadlock between mm_sem and tx assign in zfs_write() and page + fault + +The bug time sequence: +1. thread #1, `zfs_write` assign a txg "n". +2. In a same process, thread #2, mmap page fault (which means the + `mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed, + and wait previous txg "n" completed. +3. thread #1 call `uiomove` to write, however page fault is occurred + in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by + thread #2, so it stuck and can't complete, then txg "n" will + not complete. + +So thread #1 and thread #2 are deadlocked. + +Reviewed-by: Chunwei Chen +Reviewed-by: Brian Behlendorf +Reviewed-by: Matthew Ahrens +Signed-off-by: Grady Wong +Closes #7939 +--- + include/sys/uio_impl.h | 2 +- + module/zcommon/zfs_uio.c | 31 +++- + module/zfs/zfs_vnops.c | 24 ++- + tests/zfs-tests/cmd/mmapwrite/mmapwrite.c | 140 +++++++++++++----- + .../functional/mmap/mmap_write_001_pos.ksh | 8 +- + 5 files changed, 151 insertions(+), 54 deletions(-) + +diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h +index 37e283da0f8..cfef0b95dbb 100644 +--- a/include/sys/uio_impl.h ++++ b/include/sys/uio_impl.h +@@ -42,7 +42,7 @@ + #include + + extern int uiomove(void *, size_t, enum uio_rw, uio_t *); +-extern void uio_prefaultpages(ssize_t, uio_t *); ++extern int uio_prefaultpages(ssize_t, uio_t *); + extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *); + extern void uioskip(uio_t *, size_t); + +diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c +index 7b4175bbeee..8e969bbcc00 100644 +--- a/module/zcommon/zfs_uio.c ++++ b/module/zcommon/zfs_uio.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + + /* + * Move "n" bytes at byte address "p"; "rw" indicates the direction +@@ -77,8 +78,24 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio) + if (copy_to_user(iov->iov_base+skip, p, cnt)) + return (EFAULT); + } else { +- if (copy_from_user(p, iov->iov_base+skip, cnt)) +- return (EFAULT); ++ if (uio->uio_fault_disable) { ++ if (!access_ok(VERIFY_READ, ++ (iov->iov_base + skip), cnt)) { ++ return (EFAULT); ++ } ++ ++ pagefault_disable(); ++ if (__copy_from_user_inatomic(p, ++ (iov->iov_base + skip), cnt)) { ++ pagefault_enable(); ++ return (EFAULT); ++ } ++ pagefault_enable(); ++ } else { ++ if (copy_from_user(p, ++ (iov->iov_base + skip), cnt)) ++ return (EFAULT); ++ } + } + break; + case UIO_SYSSPACE: +@@ -156,7 +173,7 @@ EXPORT_SYMBOL(uiomove); + * error will terminate the process as this is only a best attempt to get + * the pages resident. + */ +-void ++int + uio_prefaultpages(ssize_t n, struct uio *uio) + { + const struct iovec *iov; +@@ -170,7 +187,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio) + switch (uio->uio_segflg) { + case UIO_SYSSPACE: + case UIO_BVEC: +- return; ++ return (0); + case UIO_USERSPACE: + case UIO_USERISPACE: + break; +@@ -194,7 +211,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio) + p = iov->iov_base + skip; + while (cnt) { + if (fuword8((uint8_t *)p, &tmp)) +- return; ++ return (EFAULT); + incr = MIN(cnt, PAGESIZE); + p += incr; + cnt -= incr; +@@ -204,8 +221,10 @@ uio_prefaultpages(ssize_t n, struct uio *uio) + */ + p--; + if (fuword8((uint8_t *)p, &tmp)) +- return; ++ return (EFAULT); + } ++ ++ return (0); + } + EXPORT_SYMBOL(uio_prefaultpages); + +diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c +index 5a2e55eb19a..c866352d765 100644 +--- a/module/zfs/zfs_vnops.c ++++ b/module/zfs/zfs_vnops.c +@@ -675,7 +675,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) + xuio = (xuio_t *)uio; + else + #endif +- uio_prefaultpages(MIN(n, max_blksz), uio); ++ if (uio_prefaultpages(MIN(n, max_blksz), uio)) { ++ ZFS_EXIT(zfsvfs); ++ return (SET_ERROR(EFAULT)); ++ } + + /* + * If in append mode, set the io offset pointer to eof. +@@ -820,8 +823,19 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) + + if (abuf == NULL) { + tx_bytes = uio->uio_resid; ++ uio->uio_fault_disable = B_TRUE; + error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, nbytes, tx); ++ if (error == EFAULT) { ++ dmu_tx_commit(tx); ++ if (uio_prefaultpages(MIN(n, max_blksz), uio)) { ++ break; ++ } ++ continue; ++ } else if (error != 0) { ++ dmu_tx_commit(tx); ++ break; ++ } + tx_bytes -= uio->uio_resid; + } else { + tx_bytes = nbytes; +@@ -921,8 +935,12 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) + ASSERT(tx_bytes == nbytes); + n -= nbytes; + +- if (!xuio && n > 0) +- uio_prefaultpages(MIN(n, max_blksz), uio); ++ if (!xuio && n > 0) { ++ if (uio_prefaultpages(MIN(n, max_blksz), uio)) { ++ error = EFAULT; ++ break; ++ } ++ } + } + + zfs_inode_update(zp); +diff --git a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c +index 190d31af3aa..b9915d5d31e 100644 +--- a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c ++++ b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c +@@ -31,74 +31,132 @@ + #include + #include + #include ++#include ++#include + + /* + * -------------------------------------------------------------------- +- * Bug Id: 5032643 ++ * Bug Issue Id: #7512 ++ * The bug time sequence: ++ * 1. context #1, zfs_write assign a txg "n". ++ * 2. In the same process, context #2, mmap page fault (which means the mm_sem ++ * is hold) occurred, zfs_dirty_inode open a txg failed, and wait previous ++ * txg "n" completed. ++ * 3. context #1 call uiomove to write, however page fault is occurred in ++ * uiomove, which means it need mm_sem, but mm_sem is hold by ++ * context #2, so it stuck and can't complete, then txg "n" will not ++ * complete. + * +- * Simply writing to a file and mmaping that file at the same time can +- * result in deadlock. Nothing perverse like writing from the file's +- * own mapping is required. ++ * So context #1 and context #2 trap into the "dead lock". + * -------------------------------------------------------------------- + */ + ++#define NORMAL_WRITE_TH_NUM 2 ++ + static void * +-mapper(void *fdp) ++normal_writer(void *filename) + { +- void *addr; +- int fd = *(int *)fdp; ++ char *file_path = filename; ++ int fd = -1; ++ ssize_t write_num = 0; ++ int page_size = getpagesize(); + +- if ((addr = +- mmap(0, 8192, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { +- perror("mmap"); +- exit(1); ++ fd = open(file_path, O_RDWR | O_CREAT, 0777); ++ if (fd == -1) { ++ err(1, "failed to open %s", file_path); + } +- for (;;) { +- if (mmap(addr, 8192, PROT_READ, +- MAP_SHARED|MAP_FIXED, fd, 0) == MAP_FAILED) { +- perror("mmap"); +- exit(1); ++ ++ char *buf = malloc(1); ++ while (1) { ++ write_num = write(fd, buf, 1); ++ if (write_num == 0) { ++ err(1, "write failed!"); ++ break; + } ++ lseek(fd, page_size, SEEK_CUR); ++ } ++ ++ if (buf) { ++ free(buf); + } +- /* NOTREACHED */ +- return ((void *)1); + } + +-int +-main(int argc, char **argv) ++static void * ++map_writer(void *filename) + { +- int fd; +- char buf[1024]; +- pthread_t tid; ++ int fd = -1; ++ int ret = 0; ++ char *buf = NULL; ++ int page_size = getpagesize(); ++ int op_errno = 0; ++ char *file_path = filename; + +- memset(buf, 'a', sizeof (buf)); ++ while (1) { ++ ret = access(file_path, F_OK); ++ if (ret) { ++ op_errno = errno; ++ if (op_errno == ENOENT) { ++ fd = open(file_path, O_RDWR | O_CREAT, 0777); ++ if (fd == -1) { ++ err(1, "open file failed"); ++ } + +- if (argc != 2) { +- (void) printf("usage: %s \n", argv[0]); +- exit(1); +- } ++ ret = ftruncate(fd, page_size); ++ if (ret == -1) { ++ err(1, "truncate file failed"); ++ } ++ } else { ++ err(1, "access file failed!"); ++ } ++ } else { ++ fd = open(file_path, O_RDWR, 0777); ++ if (fd == -1) { ++ err(1, "open file failed"); ++ } ++ } + +- if ((fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666)) == -1) { +- perror("open"); +- exit(1); ++ if ((buf = mmap(NULL, page_size, PROT_READ | PROT_WRITE, ++ MAP_SHARED, fd, 0)) == MAP_FAILED) { ++ err(1, "map file failed"); ++ } ++ ++ if (fd != -1) ++ close(fd); ++ ++ char s[10] = {0, }; ++ memcpy(buf, s, 10); ++ ret = munmap(buf, page_size); ++ if (ret != 0) { ++ err(1, "unmap file failed"); ++ } + } ++} + +- (void) pthread_setconcurrency(2); +- if (pthread_create(&tid, NULL, mapper, &fd) != 0) { +- perror("pthread_create"); +- close(fd); ++int ++main(int argc, char **argv) ++{ ++ pthread_t map_write_tid; ++ pthread_t normal_write_tid[NORMAL_WRITE_TH_NUM]; ++ int i = 0; ++ ++ if (argc != 3) { ++ (void) printf("usage: %s " ++ "\n", argv[0]); + exit(1); + } +- for (;;) { +- if (write(fd, buf, sizeof (buf)) == -1) { +- perror("write"); +- close(fd); +- exit(1); ++ ++ for (i = 0; i < NORMAL_WRITE_TH_NUM; i++) { ++ if (pthread_create(&normal_write_tid[i], NULL, normal_writer, ++ argv[1])) { ++ err(1, "pthread_create normal_writer failed."); + } + } + +- close(fd); ++ if (pthread_create(&map_write_tid, NULL, map_writer, argv[2])) { ++ err(1, "pthread_create map_writer failed."); ++ } + + /* NOTREACHED */ ++ pthread_join(map_write_tid, NULL); + return (0); + } +diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh +index 1eda971041d..24150b827f8 100755 +--- a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh ++++ b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh +@@ -53,12 +53,14 @@ if ! is_mp; then + fi + + log_must chmod 777 $TESTDIR +-mmapwrite $TESTDIR/test-write-file & ++mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file & + PID_MMAPWRITE=$! +-log_note "mmapwrite $TESTDIR/test-write-file pid: $PID_MMAPWRITE" ++log_note "mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file"\ ++ "pid: $PID_MMAPWRITE" + log_must sleep 30 + + log_must kill -9 $PID_MMAPWRITE +-log_must ls -l $TESTDIR/test-write-file ++log_must ls -l $TESTDIR/normal_write_file ++log_must ls -l $TESTDIR/map_write_file + + log_pass "write(2) a mmap(2)'ing file succeeded." diff -Nru zfs-linux-0.7.12/debian/patches/series zfs-linux-0.7.12/debian/patches/series --- zfs-linux-0.7.12/debian/patches/series 2019-06-09 03:17:33.000000000 +0000 +++ zfs-linux-0.7.12/debian/patches/series 2020-04-22 05:12:46.000000000 +0000 @@ -10,3 +10,5 @@ 2000-increase-default-zcmd-allocation-to-256K.patch init-debian-openrc-workaround.patch e22bfd814960295029ca41c8e116e8d516d3e730.patch +98bb45e27ae80145a6ce028df90fccdb23f8901d.patch +01937958ce85b1cd8942dbaf9a3f9768c5b02a0a.patch