| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright (c) 2016 Trond Myklebust |
| 4 | * Copyright (c) 2019 Jeff Layton |
| 5 | * |
| 6 | * I/O and data path helper functionality. |
| 7 | * |
| 8 | * Heavily borrowed from equivalent code in fs/nfs/io.c |
| 9 | */ |
| 10 | |
| 11 | #include <linux/ceph/ceph_debug.h> |
| 12 | |
| 13 | #include <linux/types.h> |
| 14 | #include <linux/kernel.h> |
| 15 | #include <linux/rwsem.h> |
| 16 | #include <linux/fs.h> |
| 17 | |
| 18 | #include "super.h" |
| 19 | #include "io.h" |
| 20 | |
| 21 | /* Call with exclusively locked inode->i_rwsem */ |
| 22 | static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode) |
| 23 | { |
| 24 | bool is_odirect; |
| 25 | |
| 26 | lockdep_assert_held_write(&inode->i_rwsem); |
| 27 | |
| 28 | spin_lock(lock: &ci->i_ceph_lock); |
| 29 | /* ensure that bit state is consistent */ |
| 30 | smp_mb__before_atomic(); |
| 31 | is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; |
| 32 | if (is_odirect) { |
| 33 | clear_bit(CEPH_I_ODIRECT_BIT, addr: &ci->i_ceph_flags); |
| 34 | /* ensure modified bit is visible */ |
| 35 | smp_mb__after_atomic(); |
| 36 | } |
| 37 | spin_unlock(lock: &ci->i_ceph_lock); |
| 38 | |
| 39 | if (is_odirect) |
| 40 | inode_dio_wait(inode); |
| 41 | } |
| 42 | |
| 43 | /** |
| 44 | * ceph_start_io_read - declare the file is being used for buffered reads |
| 45 | * @inode: file inode |
| 46 | * |
| 47 | * Declare that a buffered read operation is about to start, and ensure |
| 48 | * that we block all direct I/O. |
| 49 | * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset, |
| 50 | * and holds a shared lock on inode->i_rwsem to ensure that the flag |
| 51 | * cannot be changed. |
| 52 | * In practice, this means that buffered read operations are allowed to |
| 53 | * execute in parallel, thanks to the shared lock, whereas direct I/O |
| 54 | * operations need to wait to grab an exclusive lock in order to set |
| 55 | * CEPH_I_ODIRECT. |
| 56 | * Note that buffered writes and truncates both take a write lock on |
| 57 | * inode->i_rwsem, meaning that those are serialised w.r.t. the reads. |
| 58 | */ |
| 59 | int ceph_start_io_read(struct inode *inode) |
| 60 | { |
| 61 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 62 | bool is_odirect; |
| 63 | int err; |
| 64 | |
| 65 | /* Be an optimist! */ |
| 66 | err = down_read_killable(sem: &inode->i_rwsem); |
| 67 | if (err) |
| 68 | return err; |
| 69 | |
| 70 | spin_lock(lock: &ci->i_ceph_lock); |
| 71 | /* ensure that bit state is consistent */ |
| 72 | smp_mb__before_atomic(); |
| 73 | is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; |
| 74 | spin_unlock(lock: &ci->i_ceph_lock); |
| 75 | if (!is_odirect) |
| 76 | return 0; |
| 77 | up_read(sem: &inode->i_rwsem); |
| 78 | |
| 79 | /* Slow path.... */ |
| 80 | err = down_write_killable(sem: &inode->i_rwsem); |
| 81 | if (err) |
| 82 | return err; |
| 83 | |
| 84 | ceph_block_o_direct(ci, inode); |
| 85 | downgrade_write(sem: &inode->i_rwsem); |
| 86 | |
| 87 | return 0; |
| 88 | } |
| 89 | |
| 90 | /** |
| 91 | * ceph_end_io_read - declare that the buffered read operation is done |
| 92 | * @inode: file inode |
| 93 | * |
| 94 | * Declare that a buffered read operation is done, and release the shared |
| 95 | * lock on inode->i_rwsem. |
| 96 | */ |
| 97 | void |
| 98 | ceph_end_io_read(struct inode *inode) |
| 99 | { |
| 100 | up_read(sem: &inode->i_rwsem); |
| 101 | } |
| 102 | |
| 103 | /** |
| 104 | * ceph_start_io_write - declare the file is being used for buffered writes |
| 105 | * @inode: file inode |
| 106 | * |
| 107 | * Declare that a buffered write operation is about to start, and ensure |
| 108 | * that we block all direct I/O. |
| 109 | */ |
| 110 | int ceph_start_io_write(struct inode *inode) |
| 111 | { |
| 112 | int err = down_write_killable(sem: &inode->i_rwsem); |
| 113 | if (!err) |
| 114 | ceph_block_o_direct(ci: ceph_inode(inode), inode); |
| 115 | return err; |
| 116 | } |
| 117 | |
| 118 | /** |
| 119 | * ceph_end_io_write - declare that the buffered write operation is done |
| 120 | * @inode: file inode |
| 121 | * |
| 122 | * Declare that a buffered write operation is done, and release the |
| 123 | * lock on inode->i_rwsem. |
| 124 | */ |
| 125 | void |
| 126 | ceph_end_io_write(struct inode *inode) |
| 127 | { |
| 128 | up_write(sem: &inode->i_rwsem); |
| 129 | } |
| 130 | |
| 131 | /* Call with exclusively locked inode->i_rwsem */ |
| 132 | static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode) |
| 133 | { |
| 134 | bool is_odirect; |
| 135 | |
| 136 | lockdep_assert_held_write(&inode->i_rwsem); |
| 137 | |
| 138 | spin_lock(lock: &ci->i_ceph_lock); |
| 139 | /* ensure that bit state is consistent */ |
| 140 | smp_mb__before_atomic(); |
| 141 | is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; |
| 142 | if (!is_odirect) { |
| 143 | set_bit(CEPH_I_ODIRECT_BIT, addr: &ci->i_ceph_flags); |
| 144 | /* ensure modified bit is visible */ |
| 145 | smp_mb__after_atomic(); |
| 146 | } |
| 147 | spin_unlock(lock: &ci->i_ceph_lock); |
| 148 | |
| 149 | if (!is_odirect) { |
| 150 | /* FIXME: unmap_mapping_range? */ |
| 151 | filemap_write_and_wait(mapping: inode->i_mapping); |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * ceph_start_io_direct - declare the file is being used for direct i/o |
| 157 | * @inode: file inode |
| 158 | * |
| 159 | * Declare that a direct I/O operation is about to start, and ensure |
| 160 | * that we block all buffered I/O. |
| 161 | * On exit, the function ensures that the CEPH_I_ODIRECT flag is set, |
| 162 | * and holds a shared lock on inode->i_rwsem to ensure that the flag |
| 163 | * cannot be changed. |
| 164 | * In practice, this means that direct I/O operations are allowed to |
| 165 | * execute in parallel, thanks to the shared lock, whereas buffered I/O |
| 166 | * operations need to wait to grab an exclusive lock in order to clear |
| 167 | * CEPH_I_ODIRECT. |
| 168 | * Note that buffered writes and truncates both take a write lock on |
| 169 | * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT. |
| 170 | */ |
| 171 | int ceph_start_io_direct(struct inode *inode) |
| 172 | { |
| 173 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 174 | bool is_odirect; |
| 175 | int err; |
| 176 | |
| 177 | /* Be an optimist! */ |
| 178 | err = down_read_killable(sem: &inode->i_rwsem); |
| 179 | if (err) |
| 180 | return err; |
| 181 | |
| 182 | spin_lock(lock: &ci->i_ceph_lock); |
| 183 | /* ensure that bit state is consistent */ |
| 184 | smp_mb__before_atomic(); |
| 185 | is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT; |
| 186 | spin_unlock(lock: &ci->i_ceph_lock); |
| 187 | if (is_odirect) |
| 188 | return 0; |
| 189 | up_read(sem: &inode->i_rwsem); |
| 190 | |
| 191 | /* Slow path.... */ |
| 192 | err = down_write_killable(sem: &inode->i_rwsem); |
| 193 | if (err) |
| 194 | return err; |
| 195 | |
| 196 | ceph_block_buffered(ci, inode); |
| 197 | downgrade_write(sem: &inode->i_rwsem); |
| 198 | |
| 199 | return 0; |
| 200 | } |
| 201 | |
| 202 | /** |
| 203 | * ceph_end_io_direct - declare that the direct i/o operation is done |
| 204 | * @inode: file inode |
| 205 | * |
| 206 | * Declare that a direct I/O operation is done, and release the shared |
| 207 | * lock on inode->i_rwsem. |
| 208 | */ |
| 209 | void |
| 210 | ceph_end_io_direct(struct inode *inode) |
| 211 | { |
| 212 | up_read(sem: &inode->i_rwsem); |
| 213 | } |
| 214 | |