#include <crypto/hash.h>
#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include <linux/compat.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
#include <linux/uio.h>
#include <linux/magic.h>
#include <linux/iversion.h>
#include <linux/swap.h>
#include <linux/migrate.h>
#include <linux/sched/mm.h>
#include <linux/iomap.h>
#include <asm/unaligned.h>
#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "ordered-data.h"
#include "xattr.h"
#include "tree-log.h"
#include "volumes.h"
#include "compression.h"
#include "locking.h"
#include "free-space-cache.h"
#include "props.h"
#include "qgroup.h"
#include "delalloc-space.h"
#include "block-group.h"
#include "space-info.h"
struct btrfs_iget_args {
u64 ino;
struct btrfs_root *root;
};
struct btrfs_dio_data {
u64 reserve;
loff_t length;
ssize_t submitted;
struct extent_changeset *data_reserved;
};
static const struct inode_operations btrfs_dir_inode_operations;
static const struct inode_operations btrfs_symlink_inode_operations;
static const struct inode_operations btrfs_special_inode_operations;
static const struct inode_operations btrfs_file_inode_operations;
static const struct address_space_operations btrfs_aops;
static const struct file_operations btrfs_dir_file_operations;
static struct kmem_cache *btrfs_inode_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_path_cachep;
struct kmem_cache *btrfs_free_space_cachep;
struct kmem_cache *btrfs_free_space_bitmap_cachep;
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode, bool skip_writeback);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 orig_block_len,
u64 ram_bytes, int compress_type,
int type);
static void __endio_write_update_ordered(struct btrfs_inode *inode,
const u64 offset, const u64 bytes,
const bool uptodate);
int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
{
if (ilock_flags & BTRFS_ILOCK_SHARED) {
if (ilock_flags & BTRFS_ILOCK_TRY) {
if (!inode_trylock_shared(inode))
return -EAGAIN;
else
return 0;
}
inode_lock_shared(inode);
} else {
if (ilock_flags & BTRFS_ILOCK_TRY) {
if (!inode_trylock(inode))
return -EAGAIN;
else
return 0;
}
inode_lock(inode);
}
return 0;
}
void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags)
{
if (ilock_flags & BTRFS_ILOCK_SHARED)
inode_unlock_shared(inode);
else
inode_unlock(inode);
}
static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
struct page *locked_page,
u64 offset, u64 bytes)
{
unsigned long index = offset >> PAGE_SHIFT;
unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
u64 page_start = page_offset(locked_page);
u64 page_end = page_start + PAGE_SIZE - 1;
struct page *page;
while (index <= end_index) {
page = find_get_page(inode->vfs_inode.i_mapping, index);
index++;
if (!page)
continue;
ClearPagePrivate2(page);
put_page(page);
}
if (page_start >= offset && page_end <= (offset + bytes - 1)) {
offset += PAGE_SIZE;
bytes -= PAGE_SIZE;
}
return __endio_write_update_ordered(inode, offset, bytes, false);
}
static int btrfs_dirty_inode(struct inode *inode);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
const struct qstr *qstr)
{
int err;
err = btrfs_init_acl(trans, inode, dir);
if (!err)
err = btrfs_xattr_security_init(trans, inode, dir, qstr);
return err;
}
static int insert_inline_extent(struct btrfs_trans_handle *trans,
struct btrfs_path *path, bool extent_inserted,
struct btrfs_root *root, struct inode *inode,
u64 start, size_t size, size_t compressed_size,
int compress_type,
struct page **compressed_pages)
{
struct extent_buffer *leaf;
struct page *page = NULL;
char *kaddr;
unsigned long ptr;
struct btrfs_file_extent_item *ei;
int ret;
size_t cur_size = size;
unsigned long offset;
ASSERT((compressed_size > 0 && compressed_pages) ||
(compressed_size == 0 && !compressed_pages));
if (compressed_size && compressed_pages)
cur_size = compressed_size;
if (!extent_inserted) {
struct btrfs_key key;
size_t datasize;
key.objectid = btrfs_ino(BTRFS_I(inode));
key.offset = start;
key.type = BTRFS_EXTENT_DATA_KEY;
datasize = btrfs_file_extent_calc_inline_size(cur_size);
ret = btrfs_insert_empty_item(trans, root, path, &key,
datasize);
if (ret)
goto fail;
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_file_extent_generation(leaf, ei, trans->transid);
btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
btrfs_set_file_extent_encryption(leaf, ei, 0);
btrfs_set_file_extent_other_encoding(leaf, ei, 0);
btrfs_set_file_extent_ram_bytes(leaf, ei, size);
ptr = btrfs_file_extent_inline_start(ei);
if (compress_type != BTRFS_COMPRESS_NONE) {
struct page *cpage;
int i = 0;
while (compressed_size > 0) {
cpage = compressed_pages[i];
cur_size = min_t(unsigned long, compressed_size,
PAGE_SIZE);
kaddr = kmap_atomic(cpage);
write_extent_buffer(leaf, kaddr, ptr, cur_size);
kunmap_atomic(kaddr);
i++;
ptr += cur_size;
compressed_size -= cur_size;
}
btrfs_set_file_extent_compression(leaf, ei,
compress_type);
} else {
page = find_get_page(inode->i_mapping,
start >> PAGE_SHIFT);
btrfs_set_file_extent_compression(leaf, ei, 0);
kaddr = kmap_atomic(page);
offset = offset_in_page(start);
write_extent_buffer(leaf, kaddr + offset, ptr, size);
kunmap_atomic(kaddr);
put_page(page);
}
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
size = ALIGN(size, root->fs_info->sectorsize);
ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size);
if (ret)
goto fail;
BTRFS_I(inode)->disk_i_size = inode->i_size;
fail:
return ret;
}
static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
u64 end, size_t compressed_size,
int compress_type,
struct page **compressed_pages)
{
struct btrfs_drop_extents_args drop_args = { 0 };
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
u64 isize = i_size_read(&inode->vfs_inode);
u64 actual_end = min(end + 1, isize);
u64 inline_len = actual_end - start;
u64 aligned_end = ALIGN(end, fs_info->sectorsize);
u64 data_len = inline_len;
int ret;
struct btrfs_path *path;
if (compressed_size)
data_len = compressed_size;
if (start > 0 ||
actual_end > fs_info->sectorsize ||
data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
(!compressed_size &&
(actual_end & (fs_info->sectorsize - 1)) == 0) ||
end + 1 < isize ||
data_len > fs_info->max_inline) {
return 1;
}
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
btrfs_free_path(path);
return PTR_ERR(trans);
}
trans->block_rsv = &inode->block_rsv;
drop_args.path = path;
drop_args.start = start;
drop_args.end = aligned_end;
drop_args.drop_cache = true;
drop_args.replace_extent = true;
if (compressed_size && compressed_pages)
drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
compressed_size);
else
drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
inline_len);
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
if (isize > actual_end)
inline_len = min_t(u64, isize, actual_end);
ret = insert_inline_extent(trans, path, drop_args.extent_inserted,
root, &inode->vfs_inode, start,
inline_len, compressed_size,
compress_type, compressed_pages);
if (ret && ret != -ENOSPC) {
btrfs_abort_transaction(trans, ret);
goto out;
} else if (ret == -ENOSPC) {
ret = 1;
goto out;
}
btrfs_update_inode_bytes(inode, inline_len, drop_args.bytes_found);
ret = btrfs_update_inode(trans, root, inode);
if (ret && ret != -ENOSPC) {
btrfs_abort_transaction(trans, ret);
goto out;
} else if (ret == -ENOSPC) {
ret = 1;
goto out;
}
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
out:
btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
btrfs_free_path(path);
btrfs_end_transaction(trans);
return ret;
}
struct async_extent {
u64 start;
u64 ram_size;
u64 compressed_size;
struct page **pages;
unsigned long nr_pages;
int compress_type;
struct list_head list;
};
struct async_chunk {
struct inode *inode;
struct page *locked_page;
u64 start;
u64 end;
unsigned int write_flags;
struct list_head extents;
struct cgroup_subsys_state *blkcg_css;
struct btrfs_work work;
atomic_t *pending;
};
struct async_cow {
atomic_t num_chunks;
struct async_chunk chunks[];
};
static noinline int add_async_extent(struct async_chunk *cow,
u64 start, u64 ram_size,
u64 compressed_size,
struct page **pages,
unsigned long nr_pages,
int compress_type)
{
struct async_extent *async_extent;
async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
BUG_ON(!async_extent);
async_extent->start = start;
async_extent->ram_size = ram_size;
async_extent->compressed_size = compressed_size;
async_extent->pages = pages;
async_extent->nr_pages = nr_pages;
async_extent->compress_type = compress_type;
list_add_tail(&async_extent->list, &cow->extents);
return 0;
}
static inline bool inode_can_compress(struct btrfs_inode *inode)
{
if (inode->flags & BTRFS_INODE_NODATACOW ||
inode->flags & BTRFS_INODE_NODATASUM)
return false;
return true;
}
static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
u64 end)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
if (!inode_can_compress(inode)) {
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
btrfs_ino(inode));
return 0;
}
if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
return 1;
if (inode->defrag_compress)
return 1;
if (inode->flags & BTRFS_INODE_NOCOMPRESS)
return 0;
if (btrfs_test_opt(fs_info, COMPRESS) ||
inode->flags & BTRFS_INODE_COMPRESS ||
inode->prop_compress)
return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
return 0;
}
static inline void inode_should_defrag(struct btrfs_inode *inode,
u64 start, u64 end, u64 num_bytes, u64 small_write)
{
if (num_bytes < small_write &&
(start > 0 || end + 1 < inode->disk_i_size))
btrfs_add_inode_defrag(NULL, inode);
}
static noinline int compress_file_range(struct async_chunk *async_chunk)
{
struct inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 blocksize = fs_info->sectorsize;
u64 start = async_chunk->start;
u64 end = async_chunk->end;
u64 actual_end;
u64 i_size;
int ret = 0;
struct page **pages = NULL;
unsigned long nr_pages;
unsigned long total_compressed = 0;
unsigned long total_in = 0;
int i;
int will_compress;
int compress_type = fs_info->compress_type;
int compressed_extents = 0;
int redirty = 0;
inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
SZ_16K);
barrier();
i_size = i_size_read(inode);
barrier();
actual_end = min_t(u64, i_size, end + 1);
again:
will_compress = 0;
nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
nr_pages = min_t(unsigned long, nr_pages,
BTRFS_MAX_COMPRESSED / PAGE_SIZE);
if (actual_end <= start)
goto cleanup_and_bail_uncompressed;
total_compressed = actual_end - start;
if (total_compressed <= blocksize &&
(start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
goto cleanup_and_bail_uncompressed;
total_compressed = min_t(unsigned long, total_compressed,
BTRFS_MAX_UNCOMPRESSED);
total_in = 0;
ret = 0;
if (inode_need_compress(BTRFS_I(inode), start, end)) {
WARN_ON(pages);
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
nr_pages = 0;
goto cont;
}
if (BTRFS_I(inode)->defrag_compress)
compress_type = BTRFS_I(inode)->defrag_compress;
else if (BTRFS_I(inode)->prop_compress)
compress_type = BTRFS_I(inode)->prop_compress;
if (!redirty) {
extent_range_clear_dirty_for_io(inode, start, end);
redirty = 1;
}
ret = btrfs_compress_pages(
compress_type | (fs_info->compress_level << 4),
inode->i_mapping, start,
pages,
&nr_pages,
&total_in,
&total_compressed);
if (!ret) {
unsigned long offset = offset_in_page(total_compressed);
struct page *page = pages[nr_pages - 1];
char *kaddr;
if (offset) {
kaddr = kmap_atomic(page);
memset(kaddr + offset, 0,
PAGE_SIZE - offset);
kunmap_atomic(kaddr);
}
will_compress = 1;
}
}
cont:
if (start == 0) {
if (ret || total_in < actual_end) {
ret = cow_file_range_inline(BTRFS_I(inode), start, end,
0, BTRFS_COMPRESS_NONE,
NULL);
} else {
ret = cow_file_range_inline(BTRFS_I(inode), start, end,
total_compressed,
compress_type, pages);
}
if (ret <= 0) {
unsigned long clear_flags = EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING;
unsigned long page_error_op;
page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
NULL,
clear_flags,
PAGE_UNLOCK |
PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK |
page_error_op |
PAGE_END_WRITEBACK);
if (pages) {
for (i = 0; i < nr_pages; i++) {
WARN_ON(pages[i]->mapping);
put_page(pages[i]);
}
kfree(pages);
}
return 0;
}
}
if (will_compress) {
total_compressed = ALIGN(total_compressed, blocksize);
total_in = ALIGN(total_in, PAGE_SIZE);
if (total_compressed + blocksize <= total_in) {
compressed_extents++;
add_async_extent(async_chunk, start, total_in,
total_compressed, pages, nr_pages,
compress_type);
if (start + total_in < end) {
start += total_in;
pages = NULL;
cond_resched();
goto again;
}
return compressed_extents;
}
}
if (pages) {
for (i = 0; i < nr_pages; i++) {
WARN_ON(pages[i]->mapping);
put_page(pages[i]);
}
kfree(pages);
pages = NULL;
total_compressed = 0;
nr_pages = 0;
if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
!(BTRFS_I(inode)->prop_compress)) {
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
}
}
cleanup_and_bail_uncompressed:
if (async_chunk->locked_page &&
(page_offset(async_chunk->locked_page) >= start &&
page_offset(async_chunk->locked_page)) <= end) {
__set_page_dirty_nobuffers(async_chunk->locked_page);
}
if (redirty)
extent_range_redirty_for_io(inode, start, end);
add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
BTRFS_COMPRESS_NONE);
compressed_extents++;
return compressed_extents;
}
static void free_async_extent_pages(struct async_extent *async_extent)
{
int i;
if (!async_extent->pages)
return;
for (i = 0; i < async_extent->nr_pages; i++) {
WARN_ON(async_extent->pages[i]->mapping);
put_page(async_extent->pages[i]);
}
kfree(async_extent->pages);
async_extent->nr_pages = 0;
async_extent->pages = NULL;
}
static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
{
struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct async_extent *async_extent;
u64 alloc_hint = 0;
struct btrfs_key ins;
struct extent_map *em;
struct btrfs_root *root = inode->root;
struct extent_io_tree *io_tree = &inode->io_tree;
int ret = 0;
again:
while (!list_empty(&async_chunk->extents)) {
async_extent = list_entry(async_chunk->extents.next,
struct async_extent, list);
list_del(&async_extent->list);
retry:
lock_extent(io_tree, async_extent->start,
async_extent->start + async_extent->ram_size - 1);
if (!async_extent->pages) {
int page_started = 0;
unsigned long nr_written = 0;
ret = cow_file_range(inode, async_chunk->locked_page,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
&page_started, &nr_written, 0);
if (!page_started && !ret)
extent_write_locked_range(&inode->vfs_inode,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
WB_SYNC_ALL);
else if (ret && async_chunk->locked_page)
unlock_page(async_chunk->locked_page);
kfree(async_extent);
cond_resched();
continue;
}
ret = btrfs_reserve_extent(root, async_extent->ram_size,
async_extent->compressed_size,
async_extent->compressed_size,
0, alloc_hint, &ins, 1, 1);
if (ret) {
free_async_extent_pages(async_extent);
if (ret == -ENOSPC) {
unlock_extent(io_tree, async_extent->start,
async_extent->start +
async_extent->ram_size - 1);
extent_range_redirty_for_io(&inode->vfs_inode,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1);
goto retry;
}
goto out_free;
}
em = create_io_em(inode, async_extent->start,
async_extent->ram_size,
async_extent->start,
ins.objectid,
ins.offset,
ins.offset,
async_extent->ram_size,
async_extent->compress_type,
BTRFS_ORDERED_COMPRESSED);
if (IS_ERR(em))
goto out_free_reserve;
free_extent_map(em);
ret = btrfs_add_ordered_extent_compress(inode,
async_extent->start,
ins.objectid,
async_extent->ram_size,
ins.offset,
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
if (ret) {
btrfs_drop_extent_cache(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1, 0);
goto out_free_reserve;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK);
if (btrfs_submit_compressed_write(inode, async_extent->start,
async_extent->ram_size,
ins.objectid,
ins.offset, async_extent->pages,
async_extent->nr_pages,
async_chunk->write_flags,
async_chunk->blkcg_css)) {
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
const u64 end = start + async_extent->ram_size - 1;
p->mapping = inode->vfs_inode.i_mapping;
btrfs_writepage_endio_finish_ordered(p, start, end, 0);
p->mapping = NULL;
extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
PAGE_END_WRITEBACK |
PAGE_SET_ERROR);
free_async_extent_pages(async_extent);
}
alloc_hint = ins.objectid + ins.offset;
kfree(async_extent);
cond_resched();
}
return;
out_free_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_free:
extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
PAGE_SET_ERROR);
free_async_extent_pages(async_extent);
kfree(async_extent);
goto again;
}
static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
u64 num_bytes)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
u64 alloc_hint = 0;
read_lock(&em_tree->lock);
em = search_extent_mapping(em_tree, start, num_bytes);
if (em) {
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
free_extent_map(em);
em = search_extent_mapping(em_tree, 0, 0);
if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
alloc_hint = em->block_start;
if (em)
free_extent_map(em);
} else {
alloc_hint = em->block_start;
free_extent_map(em);
}
}
read_unlock(&em_tree->lock);
return alloc_hint;
}
static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
unsigned clear_bits;
unsigned long page_ops;
bool extent_reserved = false;
int ret = 0;
if (btrfs_is_free_space_inode(inode)) {
WARN_ON_ONCE(1);
ret = -EINVAL;
goto out_unlock;
}
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
if (start == 0) {
ret = cow_file_range_inline(inode, start, end, 0,
BTRFS_COMPRESS_NONE, NULL);
if (ret == 0) {
extent_clear_unlock_delalloc(inode, start, end, NULL,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK);
*nr_written = *nr_written +
(end - start + PAGE_SIZE) / PAGE_SIZE;
*page_started = 1;
goto out;
} else if (ret < 0) {
goto out_unlock;
}
}
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
min_alloc_size = num_bytes;
else
min_alloc_size = fs_info->sectorsize;
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
cur_alloc_size = ins.offset;
extent_reserved = true;
ram_size = ins.offset;
em = create_io_em(inode, start, ins.offset,
start,
ins.objectid,
ins.offset,
ins.offset,
ram_size,
BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_REGULAR );
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out_reserve;
}
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
ram_size, cur_alloc_size, 0);
if (ret)
goto out_drop_extent_cache;
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, start,
cur_alloc_size);
if (ret)
btrfs_drop_extent_cache(inode, start,
start + ram_size - 1, 0);
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
page_ops = unlock ? PAGE_UNLOCK : 0;
page_ops |= PAGE_SET_PRIVATE2;
extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
if (num_bytes < cur_alloc_size)
num_bytes = 0;
else
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
extent_reserved = false;
if (ret)
goto out_unlock;
}
out:
return ret;
out_drop_extent_cache:
btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
out_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_unlock:
clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK;
if (extent_reserved) {
extent_clear_unlock_delalloc(inode, start,
start + cur_alloc_size - 1,
locked_page,
clear_bits,
page_ops);
start += cur_alloc_size;
if (start >= end)
goto out;
}
extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits | EXTENT_CLEAR_DATA_RESV,
page_ops);
goto out;
}
static noinline void async_cow_start(struct btrfs_work *work)
{
struct async_chunk *async_chunk;
int compressed_extents;
async_chunk = container_of(work, struct async_chunk, work);
compressed_extents = compress_file_range(async_chunk);
if (compressed_extents == 0) {
btrfs_add_delayed_iput(async_chunk->inode);
async_chunk->inode = NULL;
}
}
static noinline void async_cow_submit(struct btrfs_work *work)
{
struct async_chunk *async_chunk = container_of(work, struct async_chunk,
work);
struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
unsigned long nr_pages;
nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
PAGE_SHIFT;
if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
5 * SZ_1M)
cond_wake_up_nomb(&fs_info->async_submit_wait);
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
}
static noinline void async_cow_free(struct btrfs_work *work)
{
struct async_chunk *async_chunk;
async_chunk = container_of(work, struct async_chunk, work);
if (async_chunk->inode)
btrfs_add_delayed_iput(async_chunk->inode);
if (async_chunk->blkcg_css)
css_put(async_chunk->blkcg_css);
if (atomic_dec_and_test(async_chunk->pending))
kvfree(async_chunk->pending);
}
static int cow_file_range_async(struct btrfs_inode *inode,
struct writeback_control *wbc,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
struct async_cow *ctx;
struct async_chunk *async_chunk;
unsigned long nr_pages;
u64 cur_end;
u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
int i;
bool should_compress;
unsigned nofs_flag;
const unsigned int write_flags = wbc_to_write_flags(wbc);
unlock_extent(&inode->io_tree, start, end);
if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
!btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
num_chunks = 1;
should_compress = false;
} else {
should_compress = true;
}
nofs_flag = memalloc_nofs_save();
ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
if (!ctx) {
unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING;
unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
PAGE_SET_ERROR;
extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits, page_ops);
return -ENOMEM;
}
async_chunk = ctx->chunks;
atomic_set(&ctx->num_chunks, num_chunks);
for (i = 0; i < num_chunks; i++) {
if (should_compress)
cur_end = min(end, start + SZ_512K - 1);
else
cur_end = end;
ihold(&inode->vfs_inode);
async_chunk[i].pending = &ctx->num_chunks;
async_chunk[i].inode = &inode->vfs_inode;
async_chunk[i].start = start;
async_chunk[i].end = cur_end;
async_chunk[i].write_flags = write_flags;
INIT_LIST_HEAD(&async_chunk[i].extents);
if (locked_page) {
wbc_account_cgroup_owner(wbc, locked_page,
cur_end - start);
async_chunk[i].locked_page = locked_page;
locked_page = NULL;
} else {
async_chunk[i].locked_page = NULL;
}
if (blkcg_css != blkcg_root_css) {
css_get(blkcg_css);
async_chunk[i].blkcg_css = blkcg_css;
} else {
async_chunk[i].blkcg_css = NULL;
}
btrfs_init_work(&async_chunk[i].work, async_cow_start,
async_cow_submit, async_cow_free);
nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
atomic_add(nr_pages, &fs_info->async_delalloc_pages);
btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
*nr_written += nr_pages;
start = cur_end + 1;
}
*page_started = 1;
return 0;
}
static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes)
{
int ret;
struct btrfs_ordered_sum *sums;
LIST_HEAD(list);
ret = btrfs_lookup_csums_range(fs_info->csum_root, bytenr,
bytenr + num_bytes - 1, &list, 0);
if (ret == 0 && list_empty(&list))
return 0;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
list_del(&sums->list);
kfree(sums);
}
if (ret < 0)
return ret;
return 1;
}
static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
const u64 start, const u64 end,
int *page_started, unsigned long *nr_written)
{
const bool is_space_ino = btrfs_is_free_space_inode(inode);
const bool is_reloc_ino = (inode->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &inode->io_tree;
u64 range_start = start;
u64 count;
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
if (count > 0 || is_space_ino || is_reloc_ino) {
u64 bytes = count;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
if (is_space_ino || is_reloc_ino)
bytes = range_bytes;
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
if (count > 0)
clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
0, 0, NULL);
}
return cow_file_range(inode, locked_page, start, end, page_started,
nr_written, 1);
}
static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
struct page *locked_page,
const u64 start, const u64 end,
int *page_started, int force,
unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
struct btrfs_path *path;
u64 cow_start = (u64)-1;
u64 cur_offset = start;
int ret;
bool check_prev = true;
const bool freespace_inode = btrfs_is_free_space_inode(inode);
u64 ino = btrfs_ino(inode);
bool nocow = false;
u64 disk_bytenr = 0;
path = btrfs_alloc_path();
if (!path) {
extent_clear_unlock_delalloc(inode, start, end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, PAGE_UNLOCK |
PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK);
return -ENOMEM;
}
while (1) {
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
u64 extent_end;
u64 extent_offset;
u64 num_bytes = 0;
u64 disk_num_bytes;
u64 ram_bytes;
int extent_type;
nocow = false;
ret = btrfs_lookup_file_extent(NULL, root, path, ino,
cur_offset, 0);
if (ret < 0)
goto error;
if (ret > 0 && path->slots[0] > 0 && check_prev) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key,
path->slots[0] - 1);
if (found_key.objectid == ino &&
found_key.type == BTRFS_EXTENT_DATA_KEY)
path->slots[0]--;
}
check_prev = false;
next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0) {
if (cow_start != (u64)-1)
cur_offset = cow_start;
goto error;
}
if (ret > 0)
break;
leaf = path->nodes[0];
}
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid > ino)
break;
if (WARN_ON_ONCE(found_key.objectid < ino) ||
found_key.type < BTRFS_EXTENT_DATA_KEY) {
path->slots[0]++;
goto next_slot;
}
if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
found_key.offset > end)
break;
if (found_key.offset > cur_offset) {
extent_end = found_key.offset;
extent_type = 0;
goto out_check;
}
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
extent_offset = btrfs_file_extent_offset(leaf, fi);
extent_end = found_key.offset +
btrfs_file_extent_num_bytes(leaf, fi);
disk_num_bytes =
btrfs_file_extent_disk_num_bytes(leaf, fi);
if (extent_end <= cur_offset) {
path->slots[0]++;
goto next_slot;
}
if (disk_bytenr == 0)
goto out_check;
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
goto out_check;
if (!freespace_inode &&
btrfs_file_extent_generation(leaf, fi) <=
btrfs_root_last_snapshot(&root->root_item))
goto out_check;
if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
goto out_check;
btrfs_release_path(path);
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
ret = btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr, false);
if (ret) {
if (ret < 0) {
if (cow_start != (u64)-1)
cur_offset = cow_start;
goto error;
}
WARN_ON_ONCE(freespace_inode);
goto out_check;
}
disk_bytenr += extent_offset;
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;
if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
goto out_check;
ret = csum_exist_in_range(fs_info, disk_bytenr,
num_bytes);
if (ret) {
if (ret < 0) {
if (cow_start != (u64)-1)
cur_offset = cow_start;
goto error;
}
WARN_ON_ONCE(freespace_inode);
goto out_check;
}
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
goto out_check;
nocow = true;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset + ram_bytes;
extent_end = ALIGN(extent_end, fs_info->sectorsize);
if (extent_end <= start) {
path->slots[0]++;
goto next_slot;
}
} else {
BUG();
}
out_check:
if (!nocow) {
if (cow_start == (u64)-1)
cow_start = cur_offset;
cur_offset = extent_end;
if (cur_offset > end)
break;
if (!path->nodes[0])
continue;
path->slots[0]++;
goto next_slot;
}
if (cow_start != (u64)-1) {
ret = fallback_to_cow(inode, locked_page,
cow_start, found_key.offset - 1,
page_started, nr_written);
if (ret)
goto error;
cow_start = (u64)-1;
}
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
u64 orig_start = found_key.offset - extent_offset;
struct extent_map *em;
em = create_io_em(inode, cur_offset, num_bytes,
orig_start,
disk_bytenr,
num_bytes,
disk_num_bytes,
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto error;
}
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode, cur_offset,
disk_bytenr, num_bytes,
num_bytes,
BTRFS_ORDERED_PREALLOC);
if (ret) {
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + num_bytes - 1,
0);
goto error;
}
} else {
ret = btrfs_add_ordered_extent(inode, cur_offset,
disk_bytenr, num_bytes,
num_bytes,
BTRFS_ORDERED_NOCOW);
if (ret)
goto error;
}
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
nocow = false;
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID)
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
extent_clear_unlock_delalloc(inode, cur_offset,
cur_offset + num_bytes - 1,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC |
EXTENT_CLEAR_DATA_RESV,
PAGE_UNLOCK | PAGE_SET_PRIVATE2);
cur_offset = extent_end;
if (ret)
goto error;
if (cur_offset > end)
break;
}
btrfs_release_path(path);
if (cur_offset <= end && cow_start == (u64)-1)
cow_start = cur_offset;
if (cow_start != (u64)-1) {
cur_offset = end;
ret = fallback_to_cow(inode, locked_page, cow_start, end,
page_started, nr_written);
if (ret)
goto error;
}
error:
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
if (ret && cur_offset < end)
extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK |
PAGE_END_WRITEBACK);
btrfs_free_path(path);
return ret;
}
static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end)
{
if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
!(inode->flags & BTRFS_INODE_PREALLOC))
return 0;
if (inode->defrag_bytes &&
test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL))
return 1;
return 0;
}
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc)
{
int ret;
int force_cow = need_force_cow(inode, start, end);
if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 1, nr_written);
} else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
} else if (!inode_can_compress(inode) ||
!inode_need_compress(inode, start, end)) {
ret = cow_file_range(inode, locked_page, start, end,
page_started, nr_written, 1);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
ret = cow_file_range_async(inode, wbc, locked_page, start, end,
page_started, nr_written);
}
if (ret)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
end - start + 1);
return ret;
}
void btrfs_split_delalloc_extent(struct inode *inode,
struct extent_state *orig, u64 split)
{
u64 size;
if (!(orig->state & EXTENT_DELALLOC))
return;
size = orig->end - orig->start + 1;
if (size > BTRFS_MAX_EXTENT_SIZE) {
u32 num_extents;
u64 new_size;
new_size = orig->end - split + 1;
num_extents = count_max_extents(new_size);
new_size = split - orig->start;
num_extents += count_max_extents(new_size);
if (count_max_extents(size) >= num_extents)
return;
}
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
}
void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
struct extent_state *other)
{
u64 new_size, old_size;
u32 num_extents;
if (!(other->state & EXTENT_DELALLOC))
return;
if (new->start > other->start)
new_size = new->end - other->start + 1;
else
new_size = other->end - new->start + 1;
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
spin_unlock(&BTRFS_I(inode)->lock);
return;
}
old_size = other->end - other->start + 1;
num_extents = count_max_extents(old_size);
old_size = new->end - new->start + 1;
num_extents += count_max_extents(old_size);
if (count_max_extents(new_size) >= num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
spin_unlock(&BTRFS_I(inode)->lock);
}
static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
spin_lock(&root->delalloc_lock);
if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->delalloc_inodes);
set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&BTRFS_I(inode)->runtime_flags);
root->nr_delalloc_inodes++;
if (root->nr_delalloc_inodes == 1) {
spin_lock(&fs_info->delalloc_root_lock);
BUG_ON(!list_empty(&root->delalloc_root));
list_add_tail(&root->delalloc_root,
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
}
}
spin_unlock(&root->delalloc_lock);
}
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = root->fs_info;
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&inode->runtime_flags);
root->nr_delalloc_inodes--;
if (!root->nr_delalloc_inodes) {
ASSERT(list_empty(&root->delalloc_inodes));
spin_lock(&fs_info->delalloc_root_lock);
BUG_ON(list_empty(&root->delalloc_root));
list_del_init(&root->delalloc_root);
spin_unlock(&fs_info->delalloc_root_lock);
}
}
}
static void btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode)
{
spin_lock(&root->delalloc_lock);
__btrfs_del_delalloc_inode(root, inode);
spin_unlock(&root->delalloc_lock);
}
void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
unsigned *bits)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
WARN_ON(1);
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(len);
bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
spin_unlock(&BTRFS_I(inode)->lock);
if (btrfs_is_testing(fs_info))
return;
percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
fs_info->delalloc_batch);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes += len;
if (*bits & EXTENT_DEFRAG)
BTRFS_I(inode)->defrag_bytes += len;
if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&BTRFS_I(inode)->runtime_flags))
btrfs_add_delalloc_inodes(root, inode);
spin_unlock(&BTRFS_I(inode)->lock);
}
if (!(state->state & EXTENT_DELALLOC_NEW) &&
(*bits & EXTENT_DELALLOC_NEW)) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
state->start;
spin_unlock(&BTRFS_I(inode)->lock);
}
}
void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
struct extent_state *state, unsigned *bits)
{
struct btrfs_inode *inode = BTRFS_I(vfs_inode);
struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(len);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
spin_lock(&inode->lock);
inode->defrag_bytes -= len;
spin_unlock(&inode->lock);
}
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = inode->root;
bool do_list = !btrfs_is_free_space_inode(inode);
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, -num_extents);
spin_unlock(&inode->lock);
if (*bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len, false);
if (btrfs_is_testing(fs_info))
return;
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
do_list && !(state->state & EXTENT_NORESERVE) &&
(*bits & EXTENT_CLEAR_DATA_RESV))
btrfs_free_reserved_data_space_noquota(fs_info, len);
percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
fs_info->delalloc_batch);
spin_lock(&inode->lock);
inode->delalloc_bytes -= len;
if (do_list && inode->delalloc_bytes == 0 &&
test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&inode->runtime_flags))
btrfs_del_delalloc_inode(root, inode);
spin_unlock(&inode->lock);
}
if ((state->state & EXTENT_DELALLOC_NEW) &&
(*bits & EXTENT_DELALLOC_NEW)) {
spin_lock(&inode->lock);
ASSERT(inode->new_delalloc_bytes >= len);
inode->new_delalloc_bytes -= len;
if (*bits & EXTENT_ADD_INODE_BYTES)
inode_add_bytes(&inode->vfs_inode, len);
spin_unlock(&inode->lock);
}
}
int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
unsigned long bio_flags)
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = 0;
u64 map_length;
int ret;
struct btrfs_io_geometry geom;
if (bio_flags & EXTENT_BIO_COMPRESSED)
return 0;
length = bio->bi_iter.bi_size;
map_length = length;
ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
&geom);
if (ret < 0)
return ret;
if (geom.len < length + size)
return 1;
return 0;
}
static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
u64 dio_file_offset)
{
return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
}
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
blk_status_t ret = 0;
int skip_sum;
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
!fs_info->csum_root;
if (btrfs_is_free_space_inode(BTRFS_I(inode)))
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
if (bio_op(bio) != REQ_OP_WRITE) {
ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
if (ret)
goto out;
if (bio_flags & EXTENT_BIO_COMPRESSED) {
ret = btrfs_submit_compressed_read(inode, bio,
mirror_num,
bio_flags);
goto out;
} else {
ret = btrfs_lookup_bio_sums(inode, bio, NULL);
if (ret)
goto out;
}
goto mapit;
} else if (async && !skip_sum) {
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
goto mapit;
ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
0, btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
if (ret)
goto out;
}
mapit:
ret = btrfs_map_bio(fs_info, bio, mirror_num);
out:
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
}
return ret;
}
static int add_pending_csums(struct btrfs_trans_handle *trans,
struct list_head *list)
{
struct btrfs_ordered_sum *sum;
int ret;
list_for_each_entry(sum, list, list) {
trans->adding_csums = true;
ret = btrfs_csum_file_blocks(trans, trans->fs_info->csum_root, sum);
trans->adding_csums = false;
if (ret)
return ret;
}
return 0;
}
static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
const u64 start,
const u64 len,
struct extent_state **cached_state)
{
u64 search_start = start;
const u64 end = start + len - 1;
while (search_start < end) {
const u64 search_len = end - search_start + 1;
struct extent_map *em;
u64 em_len;
int ret = 0;
em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
if (IS_ERR(em))
return PTR_ERR(em);
if (em->block_start != EXTENT_MAP_HOLE)
goto next;
em_len = em->len;
if (em->start < search_start)
em_len -= search_start - em->start;
if (em_len > search_len)
em_len = search_len;
ret = set_extent_bit(&inode->io_tree, search_start,
search_start + em_len - 1,
EXTENT_DELALLOC_NEW, 0, NULL, cached_state,
GFP_NOFS, NULL);
next:
search_start = extent_map_end(em);
free_extent_map(em);
if (ret)
return ret;
}
return 0;
}
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
unsigned int extra_bits,
struct extent_state **cached_state)
{
WARN_ON(PAGE_ALIGNED(end));
if (start >= i_size_read(&inode->vfs_inode) &&
!(inode->flags & BTRFS_INODE_PREALLOC)) {
extra_bits |= EXTENT_DELALLOC_NEW;
} else {
int ret;
ret = btrfs_find_new_delalloc_bytes(inode, start,
end + 1 - start,
cached_state);
if (ret)
return ret;
}
return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
cached_state);
}
struct btrfs_writepage_fixup {
struct page *page;
struct inode *inode;
struct btrfs_work work;
};
static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
{
struct btrfs_writepage_fixup *fixup;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
struct page *page;
struct btrfs_inode *inode;
u64 page_start;
u64 page_end;
int ret = 0;
bool free_delalloc_space = true;
fixup = container_of(work, struct btrfs_writepage_fixup, work);
page = fixup->page;
inode = BTRFS_I(fixup->inode);
page_start = page_offset(page);
page_end = page_offset(page) + PAGE_SIZE - 1;
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
PAGE_SIZE);
again:
lock_page(page);
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
if (!ret) {
btrfs_delalloc_release_extents(inode, PAGE_SIZE);
btrfs_delalloc_release_space(inode, data_reserved,
page_start, PAGE_SIZE,
true);
}
ret = 0;
goto out_page;
}
if (ret)
goto out_page;
lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
if (PagePrivate2(page))
goto out_reserved;
ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
if (ordered) {
unlock_extent_cached(&inode->io_tree, page_start, page_end,
&cached_state);
unlock_page(page);
btrfs_start_ordered_extent(ordered, 1);
btrfs_put_ordered_extent(ordered);
goto again;
}
ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
&cached_state);
if (ret)
goto out_reserved;
BUG_ON(!PageDirty(page));
free_delalloc_space = false;
out_reserved:
btrfs_delalloc_release_extents(inode, PAGE_SIZE);
if (free_delalloc_space)
btrfs_delalloc_release_space(inode, data_reserved, page_start,
PAGE_SIZE, true);
unlock_extent_cached(&inode->io_tree, page_start, page_end,
&cached_state);
out_page:
if (ret) {
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
clear_page_dirty_for_io(page);
SetPageError(page);
}
ClearPageChecked(page);
unlock_page(page);
put_page(page);
kfree(fixup);
extent_changeset_free(data_reserved);
btrfs_add_delayed_iput(&inode->vfs_inode);
}
int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
{
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_writepage_fixup *fixup;
if (TestClearPagePrivate2(page))
return 0;
if (PageChecked(page))
return -EAGAIN;
fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
if (!fixup)
return -EAGAIN;
ihold(inode);
SetPageChecked(page);
get_page(page);
btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
fixup->page = page;
fixup->inode = inode;
btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
return -EAGAIN;
}
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 file_pos,
struct btrfs_file_extent_item *stack_fi,
const bool update_inode_bytes,
u64 qgroup_reserved)
{
struct btrfs_root *root = inode->root;
const u64 sectorsize = root->fs_info->sectorsize;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
struct btrfs_drop_extents_args drop_args = { 0 };
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
drop_args.path = path;
drop_args.start = file_pos;
drop_args.end = file_pos + num_bytes;
drop_args.replace_extent = true;
drop_args.extent_item_size = sizeof(*stack_fi);
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret)
goto out;
if (!drop_args.extent_inserted) {
ins.objectid = btrfs_ino(inode);
ins.offset = file_pos;
ins.type = BTRFS_EXTENT_DATA_KEY;
ret = btrfs_insert_empty_item(trans, root, path, &ins,
sizeof(*stack_fi));
if (ret)
goto out;
}
leaf = path->nodes[0];
btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
write_extent_buffer(leaf, stack_fi,
btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(struct btrfs_file_extent_item));
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
u64 inline_size = round_down(drop_args.bytes_found, sectorsize);
inline_size = drop_args.bytes_found - inline_size;
btrfs_update_inode_bytes(inode, sectorsize, inline_size);
drop_args.bytes_found -= inline_size;
num_bytes -= sectorsize;
}
if (update_inode_bytes)
btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found);
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
if (ret)
goto out;
ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
file_pos, qgroup_reserved, &ins);
out:
btrfs_free_path(path);
return ret;
}
static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
u64 start, u64 len)
{
struct btrfs_block_group *cache;
cache = btrfs_lookup_block_group(fs_info, start);
ASSERT(cache);
spin_lock(&cache->lock);
cache->delalloc_bytes -= len;
spin_unlock(&cache->lock);
btrfs_put_block_group(cache);
}
static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_extent *oe)
{
struct btrfs_file_extent_item stack_fi;
u64 logical_len;
bool update_inode_bytes;
memset(&stack_fi, 0, sizeof(stack_fi));
btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
oe->disk_num_bytes);
if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
logical_len = oe->truncated_len;
else
logical_len = oe->num_bytes;
btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
oe->file_offset, &stack_fi,
update_inode_bytes, oe->qgroup_rsv);
}
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
{
struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans = NULL;
struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_state *cached_state = NULL;
u64 start, end;
int compress_type = 0;
int ret = 0;
u64 logical_len = ordered_extent->num_bytes;
bool freespace_inode;
bool truncated = false;
bool clear_reserved_extent = true;
unsigned int clear_bits = EXTENT_DEFRAG;
start = ordered_extent->file_offset;
end = start + ordered_extent->num_bytes - 1;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
clear_bits |= EXTENT_DELALLOC_NEW;
freespace_inode = btrfs_is_free_space_inode(inode);
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
goto out;
}
btrfs_free_io_failure_record(inode, start, end);
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
logical_len = ordered_extent->truncated_len;
if (!logical_len)
goto out;
}
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
btrfs_inode_safe_disk_i_size_write(inode, 0);
if (freespace_inode)
trans = btrfs_join_transaction_spacecache(root);
else
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
goto out;
}
trans->block_rsv = &inode->block_rsv;
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret)
btrfs_abort_transaction(trans, ret);
goto out;
}
clear_bits |= EXTENT_LOCKED;
lock_extent_bits(io_tree, start, end, &cached_state);
if (freespace_inode)
trans = btrfs_join_transaction_spacecache(root);
else
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
goto out;
}
trans->block_rsv = &inode->block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compress_type = ordered_extent->compress_type;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compress_type);
ret = btrfs_mark_extent_written(trans, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
logical_len);
} else {
BUG_ON(root == fs_info->tree_root);
ret = insert_ordered_extent_file_extent(trans, ordered_extent);
if (!ret) {
clear_reserved_extent = false;
btrfs_release_delalloc_bytes(fs_info,
ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
}
}
unpin_extent_cache(&inode->extent_tree, ordered_extent->file_offset,
ordered_extent->num_bytes, trans->transid);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = add_pending_csums(trans, &ordered_extent->list);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
if ((clear_bits & EXTENT_DELALLOC_NEW) &&
!test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
clear_extent_bit(&inode->io_tree, start, end,
EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
0, 0, &cached_state);
btrfs_inode_safe_disk_i_size_write(inode, 0);
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = 0;
out:
clear_extent_bit(&inode->io_tree, start, end, clear_bits,
(clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
&cached_state);
if (trans)
btrfs_end_transaction(trans);
if (ret || truncated) {
u64 unwritten_start = start;
if (truncated)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
btrfs_drop_extent_cache(inode, unwritten_start, end, 0);
if ((ret || !logical_len) &&
clear_reserved_extent &&
!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
if (ret && btrfs_test_opt(fs_info, DISCARD_SYNC))
btrfs_discard_extent(fs_info,
ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes,
NULL);
btrfs_free_reserved_extent(fs_info,
ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes, 1);
}
}
btrfs_remove_ordered_extent(inode, ordered_extent);
btrfs_put_ordered_extent(ordered_extent);
btrfs_put_ordered_extent(ordered_extent);
return ret;
}
static void finish_ordered_fn(struct btrfs_work *work)
{
struct btrfs_ordered_extent *ordered_extent;
ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
btrfs_finish_ordered_io(ordered_extent);
}
void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
u64 end, int uptodate)
{
struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_extent *ordered_extent = NULL;
struct btrfs_workqueue *wq;
trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
ClearPagePrivate2(page);
if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1, uptodate))
return;
if (btrfs_is_free_space_inode(inode))
wq = fs_info->endio_freespace_worker;
else
wq = fs_info->endio_write_workers;
btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
btrfs_queue_work(wq, &ordered_extent->work);
}
static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
u32 bio_offset, struct page *page, u32 pgoff)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
char *kaddr;
u32 len = fs_info->sectorsize;
const u32 csum_size = fs_info->csum_size;
unsigned int offset_sectors;
u8 *csum_expected;
u8 csum[BTRFS_CSUM_SIZE];
ASSERT(pgoff + len <= PAGE_SIZE);
offset_sectors = bio_offset >> fs_info->sectorsize_bits;
csum_expected = ((u8 *)io_bio->csum) + offset_sectors * csum_size;
kaddr = kmap_atomic(page);
shash->tfm = fs_info->csum_shash;
crypto_shash_digest(shash, kaddr + pgoff, len, csum);
if (memcmp(csum, csum_expected, csum_size))
goto zeroit;
kunmap_atomic(kaddr);
return 0;
zeroit:
btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff,
csum, csum_expected, io_bio->mirror_num);
if (io_bio->device)
btrfs_dev_stat_inc_and_print(io_bio->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
memset(kaddr + pgoff, 1, len);
flush_dcache_page(page);
kunmap_atomic(kaddr);
return -EIO;
}
int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
struct page *page, u64 start, u64 end, int mirror)
{
struct inode *inode = page->mapping->host;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
const u32 sectorsize = root->fs_info->sectorsize;
u32 pg_off;
if (PageChecked(page)) {
ClearPageChecked(page);
return 0;
}
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
return 0;
if (!root->fs_info->csum_root)
return 0;
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
return 0;
}
ASSERT(page_offset(page) <= start &&
end <= page_offset(page) + PAGE_SIZE - 1);
for (pg_off = offset_in_page(start);
pg_off < offset_in_page(end);
pg_off += sectorsize, bio_offset += sectorsize) {
int ret;
ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off);
if (ret < 0)
return -EIO;
}
return 0;
}
void btrfs_add_delayed_iput(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_inode *binode = BTRFS_I(inode);
if (atomic_add_unless(&inode->i_count, -1, 1))
return;
atomic_inc(&fs_info->nr_delayed_iputs);
spin_lock(&fs_info->delayed_iput_lock);
ASSERT(list_empty(&binode->delayed_iput));
list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock);
if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
wake_up_process(fs_info->cleaner_kthread);
}
static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode)
{
list_del_init(&inode->delayed_iput);
spin_unlock(&fs_info->delayed_iput_lock);
iput(&inode->vfs_inode);
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
wake_up(&fs_info->delayed_iputs_wait);
spin_lock(&fs_info->delayed_iput_lock);
}
static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode)
{
if (!list_empty(&inode->delayed_iput)) {
spin_lock(&fs_info->delayed_iput_lock);
if (!list_empty(&inode->delayed_iput))
run_delayed_iput_locked(fs_info, inode);
spin_unlock(&fs_info->delayed_iput_lock);
}
}
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
{
spin_lock(&fs_info->delayed_iput_lock);
while (!list_empty(&fs_info->delayed_iputs)) {
struct btrfs_inode *inode;
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
run_delayed_iput_locked(fs_info, inode);
}
spin_unlock(&fs_info->delayed_iput_lock);
}
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
{
int ret = wait_event_killable(fs_info->delayed_iputs_wait,
atomic_read(&fs_info->nr_delayed_iputs) == 0);
if (ret)
return -EINTR;
return 0;
}
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
int ret;
ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
if (ret && ret != -EEXIST) {
btrfs_abort_transaction(trans, ret);
return ret;
}
return 0;
}
static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
}
int btrfs_orphan_cleanup(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key key, found_key;
struct btrfs_trans_handle *trans;
struct inode *inode;
u64 last_objectid = 0;
int ret = 0, nr_unlink = 0;
if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
return 0;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
path->reada = READA_BACK;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = (u64)-1;
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
}
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
break;
if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
break;
btrfs_release_path(path);
if (found_key.offset == last_objectid) {
btrfs_err(fs_info,
"Error removing orphan entry, stopping orphan cleanup");
ret = -EINVAL;
goto out;
}
last_objectid = found_key.offset;
found_key.objectid = found_key.offset;
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
inode = btrfs_iget(fs_info->sb, last_objectid, root);
ret = PTR_ERR_OR_ZERO(inode);
if (ret && ret != -ENOENT)
goto out;
if (ret == -ENOENT && root == fs_info->tree_root) {
struct btrfs_root *dead_root;
int is_dead_root = 0;
spin_lock(&fs_info->fs_roots_radix_lock);
dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)found_key.objectid);
if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
is_dead_root = 1;
spin_unlock(&fs_info->fs_roots_radix_lock);
if (is_dead_root) {
key.offset = found_key.objectid - 1;
continue;
}
}
if (ret == -ENOENT || inode->i_nlink) {
if (!ret)
iput(inode);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
btrfs_debug(fs_info, "auto deleting %Lu",
found_key.objectid);
ret = btrfs_del_orphan_item(trans, root,
found_key.objectid);
btrfs_end_transaction(trans);
if (ret)
goto out;
continue;
}
nr_unlink++;
iput(inode);
}
btrfs_release_path(path);
root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
trans = btrfs_join_transaction(root);
if (!IS_ERR(trans))
btrfs_end_transaction(trans);
}
if (nr_unlink)
btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
out:
if (ret)
btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
btrfs_free_path(path);
return ret;
}
static noinline int acls_after_inode_item(struct extent_buffer *leaf,
int slot, u64 objectid,
int *first_xattr_slot)
{
u32 nritems = btrfs_header_nritems(leaf);
struct btrfs_key found_key;
static u64 xattr_access = 0;
static u64 xattr_default = 0;
int scanned = 0;
if (!xattr_access) {
xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
strlen(XATTR_NAME_POSIX_ACL_ACCESS));
xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
}
slot++;
*first_xattr_slot = -1;
while (slot < nritems) {
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != objectid)
return 0;
if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
if (*first_xattr_slot == -1)
*first_xattr_slot = slot;
if (found_key.offset == xattr_access ||
found_key.offset == xattr_default)
return 1;
}
if (found_key.type > BTRFS_XATTR_ITEM_KEY)
return 0;
slot++;
scanned++;
if (scanned >= 8)
break;
}
if (*first_xattr_slot == -1)
*first_xattr_slot = slot;
return 1;
}
static int btrfs_read_locked_inode(struct inode *inode,
struct btrfs_path *in_path)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_path *path = in_path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key location;
unsigned long ptr;
int maybe_acls;
u32 rdev;
int ret;
bool filled = false;
int first_xattr_slot;
ret = btrfs_fill_inode(inode, &rdev);
if (!ret)
filled = true;
if (!path) {
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
}
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
if (ret) {
if (path != in_path)
btrfs_free_path(path);
return ret;
}
leaf = path->nodes[0];
if (filled)
goto cache_index;
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
inode->i_mode = btrfs_inode_mode(leaf, inode_item);
set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
round_up(i_size_read(inode), fs_info->sectorsize));
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
BTRFS_I(inode)->i_otime.tv_sec =
btrfs_timespec_sec(leaf, &inode_item->otime);
BTRFS_I(inode)->i_otime.tv_nsec =
btrfs_timespec_nsec(leaf, &inode_item->otime);
inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
inode_set_iversion_queried(inode,
btrfs_inode_sequence(leaf, inode_item));
inode->i_generation = BTRFS_I(inode)->generation;
inode->i_rdev = 0;
rdev = btrfs_inode_rdev(leaf, inode_item);
BTRFS_I(inode)->index_cnt = (u64)-1;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
cache_index:
if (BTRFS_I(inode)->last_trans == fs_info->generation)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
path->slots[0]++;
if (inode->i_nlink != 1 ||
path->slots[0] >= btrfs_header_nritems(leaf))
goto cache_acl;
btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
if (location.objectid != btrfs_ino(BTRFS_I(inode)))
goto cache_acl;
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
if (location.type == BTRFS_INODE_REF_KEY) {
struct btrfs_inode_ref *ref;
ref = (struct btrfs_inode_ref *)ptr;
BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
} else if (location.type == BTRFS_INODE_EXTREF_KEY) {
struct btrfs_inode_extref *extref;
extref = (struct btrfs_inode_extref *)ptr;
BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
extref);
}
cache_acl:
maybe_acls = acls_after_inode_item(leaf, path->slots[0],
btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
if (first_xattr_slot != -1) {
path->slots[0] = first_xattr_slot;
ret = btrfs_load_inode_props(inode, path);
if (ret)
btrfs_err(fs_info,
"error loading props for ino %llu (root %llu): %d",
btrfs_ino(BTRFS_I(inode)),
root->root_key.objectid, ret);
}
if (path != in_path)
btrfs_free_path(path);
if (!maybe_acls)
cache_no_acl(inode);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_mapping->a_ops = &btrfs_aops;
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
break;
case S_IFDIR:
inode->i_fop = &btrfs_dir_file_operations;
inode->i_op = &btrfs_dir_inode_operations;
break;
case S_IFLNK:
inode->i_op = &btrfs_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_aops;
break;
default:
inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode, rdev);
break;
}
btrfs_sync_inode_flags_to_i_flags(inode);
return 0;
}
static void fill_inode_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf,
struct btrfs_inode_item *item,
struct inode *inode)
{
struct btrfs_map_token token;
btrfs_init_map_token(&token, leaf);
btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size);
btrfs_set_token_inode_mode(&token, item, inode->i_mode);
btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
btrfs_set_token_timespec_sec(&token, &item->atime,
inode->i_atime.tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->atime,
inode->i_atime.tv_nsec);
btrfs_set_token_timespec_sec(&token, &item->mtime,
inode->i_mtime.tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->mtime,
inode->i_mtime.tv_nsec);
btrfs_set_token_timespec_sec(&token, &item->ctime,
inode->i_ctime.tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->ctime,
inode->i_ctime.tv_nsec);
btrfs_set_token_timespec_sec(&token, &item->otime,
BTRFS_I(inode)->i_otime.tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->otime,
BTRFS_I(inode)->i_otime.tv_nsec);
btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
btrfs_set_token_inode_generation(&token, item,
BTRFS_I(inode)->generation);
btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
btrfs_set_token_inode_transid(&token, item, trans->transid);
btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
btrfs_set_token_inode_block_group(&token, item, 0);
}
static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode)
{
struct btrfs_inode_item *inode_item;
struct btrfs_path *path;
struct extent_buffer *leaf;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_lookup_inode(trans, root, path, &inode->location, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
goto failed;
}
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
btrfs_mark_buffer_dirty(leaf);
btrfs_set_inode_last_trans(trans, inode);
ret = 0;
failed:
btrfs_free_path(path);
return ret;
}
noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
if (!btrfs_is_free_space_inode(inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
btrfs_update_root_times(trans, root);
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
btrfs_set_inode_last_trans(trans, inode);
return ret;
}
return btrfs_update_inode_item(trans, root, inode);
}
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_inode *inode)
{
int ret;
ret = btrfs_update_inode(trans, root, inode);
if (ret == -ENOSPC)
return btrfs_update_inode_item(trans, root, inode);
return ret;
}
static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *dir,
struct btrfs_inode *inode,
const char *name, int name_len)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
int ret = 0;
struct btrfs_dir_item *di;
u64 index;
u64 ino = btrfs_ino(inode);
u64 dir_ino = btrfs_ino(dir);
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
name, name_len, -1);
if (IS_ERR_OR_NULL(di)) {
ret = di ? PTR_ERR(di) : -ENOENT;
goto err;
}
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret)
goto err;
btrfs_release_path(path);
if (inode->dir_index) {
ret = btrfs_delayed_delete_inode_ref(inode);
if (!ret) {
index = inode->dir_index;
goto skip_backref;
}
}
ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
dir_ino, &index);
if (ret) {
btrfs_info(fs_info,
"failed to delete reference to %.*s, inode %llu parent %llu",
name_len, name, ino, dir_ino);
btrfs_abort_transaction(trans, ret);
goto err;
}
skip_backref:
ret = btrfs_delete_delayed_dir_index(trans, dir, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto err;
}
ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
dir_ino);
if (ret != 0 && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto err;
}
ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
index);
if (ret == -ENOENT)
ret = 0;
else if (ret)
btrfs_abort_transaction(trans, ret);
btrfs_run_delayed_iput(fs_info, inode);
err:
btrfs_free_path(path);
if (ret)
goto out;
btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
inode_inc_iversion(&inode->vfs_inode);
inode_inc_iversion(&dir->vfs_inode);
inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
}
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *dir, struct btrfs_inode *inode,
const char *name, int name_len)
{
int ret;
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
drop_nlink(&inode->vfs_inode);
ret = btrfs_update_inode(trans, root, inode);
}
return ret;
}
static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
{
struct btrfs_root *root = BTRFS_I(dir)->root;
return btrfs_start_transaction_fallback_global_rsv(root, 5);
}
static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
{
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
struct inode *inode = d_inode(dentry);
int ret;
trans = __unlink_start_trans(dir);
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
0);
ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
BTRFS_I(d_inode(dentry)), dentry->d_name.name,
dentry->d_name.len);
if (ret)
goto out;
if (inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret)
goto out;
}
out:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(root->fs_info);
return ret;
}
static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct inode *dir, struct dentry *dentry)
{
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key key;
const char *name = dentry->d_name.name;
int name_len = dentry->d_name.len;
u64 index;
int ret;
u64 objectid;
u64 dir_ino = btrfs_ino(BTRFS_I(dir));
if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
objectid = inode->root->root_key.objectid;
} else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
objectid = inode->location.objectid;
} else {
WARN_ON(1);
return -EINVAL;
}
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
name, name_len, -1);
if (IS_ERR_OR_NULL(di)) {
ret = di ? PTR_ERR(di) : -ENOENT;
goto out;
}
leaf = path->nodes[0];
btrfs_dir_item_key_to_cpu(leaf, di, &key);
WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
ret = btrfs_delete_one_dir_name(trans, root, path, di);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_release_path(path);
if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
di = btrfs_search_dir_index_item(root, path, dir_ino,
name, name_len);
if (IS_ERR_OR_NULL(di)) {
if (!di)
ret = -ENOENT;
else
ret = PTR_ERR(di);
btrfs_abort_transaction(trans, ret);
goto out;
}
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
index = key.offset;
btrfs_release_path(path);
} else {
ret = btrfs_del_root_ref(trans, objectid,
root->root_key.objectid, dir_ino,
&index, name, name_len);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
inode_inc_iversion(dir);
dir->i_mtime = dir->i_ctime = current_time(dir);
ret = btrfs_update_inode_fallback(trans, root, BTRFS_I(dir));
if (ret)
btrfs_abort_transaction(trans, ret);
out:
btrfs_free_path(path);
return ret;
}
static noinline int may_destroy_subvol(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct btrfs_dir_item *di;
struct btrfs_key key;
u64 dir_id;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
dir_id = btrfs_super_root_dir(fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
dir_id, "default", 7, 0);
if (di && !IS_ERR(di)) {
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
if (key.objectid == root->root_key.objectid) {
ret = -EPERM;
btrfs_err(fs_info,
"deleting default subvolume %llu is not allowed",
key.objectid);
goto out;
}
btrfs_release_path(path);
}
key.objectid = root->root_key.objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret < 0)
goto out;
BUG_ON(ret == 0);
ret = 0;
if (path->slots[0] > 0) {
path->slots[0]--;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.objectid == root->root_key.objectid &&
key.type == BTRFS_ROOT_REF_KEY)
ret = -ENOTEMPTY;
}
out:
btrfs_free_path(path);
return ret;
}
static void btrfs_prune_dentries(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *node;
struct rb_node *prev;
struct btrfs_inode *entry;
struct inode *inode;
u64 objectid = 0;
if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
WARN_ON(btrfs_root_refs(&root->root_item) != 0);
spin_lock(&root->inode_lock);
again:
node = root->inode_tree.rb_node;
prev = NULL;
while (node) {
prev = node;
entry = rb_entry(node, struct btrfs_inode, rb_node);
if (objectid < btrfs_ino(entry))
node = node->rb_left;
else if (objectid > btrfs_ino(entry))
node = node->rb_right;
else
break;
}
if (!node) {
while (prev) {
entry = rb_entry(prev, struct btrfs_inode, rb_node);
if (objectid <= btrfs_ino(entry)) {
node = prev;
break;
}
prev = rb_next(prev);
}
}
while (node) {
entry = rb_entry(node, struct btrfs_inode, rb_node);
objectid = btrfs_ino(entry) + 1;
inode = igrab(&entry->vfs_inode);
if (inode) {
spin_unlock(&root->inode_lock);
if (atomic_read(&inode->i_count) > 1)
d_prune_aliases(inode);
iput(inode);
cond_resched();
spin_lock(&root->inode_lock);
goto again;
}
if (cond_resched_lock(&root->inode_lock))
goto again;
node = rb_next(node);
}
spin_unlock(&root->inode_lock);
}
int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = d_inode(dentry);
struct btrfs_root *dest = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_block_rsv block_rsv;
u64 root_flags;
int ret;
spin_lock(&dest->root_item_lock);
if (dest->send_in_progress) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu during send",
dest->root_key.objectid);
return -EPERM;
}
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
down_write(&fs_info->subvol_sem);
ret = may_destroy_subvol(dest);
if (ret)
goto out_up_write;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
goto out_up_write;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_release;
}
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
ret = btrfs_unlink_subvol(trans, dir, dentry);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
btrfs_record_root_in_trans(trans, dest);
memset(&dest->root_item.drop_progress, 0,
sizeof(dest->root_item.drop_progress));
btrfs_set_root_drop_level(&dest->root_item, 0);
btrfs_set_root_refs(&dest->root_item, 0);
if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
ret = btrfs_insert_orphan_item(trans,
fs_info->tree_root,
dest->root_key.objectid);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
}
ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
BTRFS_UUID_KEY_SUBVOL,
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
ret = btrfs_uuid_tree_remove(trans,
dest->root_item.received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
}
free_anon_bdev(dest->anon_dev);
dest->anon_dev = 0;
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
ret = btrfs_end_transaction(trans);
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
out_up_write:
up_write(&fs_info->subvol_sem);
if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
} else {
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
}
return ret;
}
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
int err = 0;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
u64 last_unlink_trans;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
return btrfs_delete_subvolume(dir, dentry);
trans = __unlink_start_trans(dir);
if (IS_ERR(trans))
return PTR_ERR(trans);
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
err = btrfs_unlink_subvol(trans, dir, dentry);
goto out;
}
err = btrfs_orphan_add(trans, BTRFS_I(inode));
if (err)
goto out;
last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
BTRFS_I(d_inode(dentry)), dentry->d_name.name,
dentry->d_name.len);
if (!err) {
btrfs_i_size_write(BTRFS_I(inode), 0);
if (last_unlink_trans >= trans->transid)
BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
}
out:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(root->fs_info);
return err;
}
#define NEED_TRUNCATE_BLOCK 1
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
u64 new_size, u32 min_type)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
struct btrfs_key found_key;
u64 extent_start = 0;
u64 extent_num_bytes = 0;
u64 extent_offset = 0;
u64 item_end = 0;
u64 last_size = new_size;
u32 found_type = (u8)-1;
int found_extent;
int del_item;
int pending_del_nr = 0;
int pending_del_slot = 0;
int extent_type = -1;
int ret;
u64 ino = btrfs_ino(inode);
u64 bytes_deleted = 0;
bool be_nice = false;
bool should_throttle = false;
const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
struct extent_state *cached_state = NULL;
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
if (!btrfs_is_free_space_inode(inode) &&
test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
be_nice = true;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->reada = READA_BACK;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
lock_extent_bits(&inode->io_tree, lock_start, (u64)-1,
&cached_state);
btrfs_drop_extent_cache(inode, ALIGN(new_size,
fs_info->sectorsize),
(u64)-1, 0);
}
if (min_type == 0 && root == inode->root)
btrfs_kill_delayed_inode_items(inode);
key.objectid = ino;
key.offset = (u64)-1;
key.type = (u8)-1;
search_again:
if (be_nice && bytes_deleted > SZ_32M &&
btrfs_should_end_transaction(trans)) {
ret = -EAGAIN;
goto out;
}
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
if (path->slots[0] == 0)
goto out;
path->slots[0]--;
}
while (1) {
u64 clear_start = 0, clear_len = 0;
fi = NULL;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
found_type = found_key.type;
if (found_key.objectid != ino)
break;
if (found_type < min_type)
break;
item_end = found_key.offset;
if (found_type == BTRFS_EXTENT_DATA_KEY) {
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
item_end +=
btrfs_file_extent_num_bytes(leaf, fi);
trace_btrfs_truncate_show_fi_regular(
inode, leaf, fi, found_key.offset);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
item_end += btrfs_file_extent_ram_bytes(leaf,
fi);
trace_btrfs_truncate_show_fi_inline(
inode, leaf, fi, path->slots[0],
found_key.offset);
}
item_end--;
}
if (found_type > min_type) {
del_item = 1;
} else {
if (item_end < new_size)
break;
if (found_key.offset >= new_size)
del_item = 1;
else
del_item = 0;
}
found_extent = 0;
if (found_type != BTRFS_EXTENT_DATA_KEY)
goto delete;
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
u64 num_dec;
clear_start = found_key.offset;
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
if (!del_item) {
u64 orig_num_bytes =
btrfs_file_extent_num_bytes(leaf, fi);
extent_num_bytes = ALIGN(new_size -
found_key.offset,
fs_info->sectorsize);
clear_start = ALIGN(new_size, fs_info->sectorsize);
btrfs_set_file_extent_num_bytes(leaf, fi,
extent_num_bytes);
num_dec = (orig_num_bytes -
extent_num_bytes);
if (test_bit(BTRFS_ROOT_SHAREABLE,
&root->state) &&
extent_start != 0)
inode_sub_bytes(&inode->vfs_inode,
num_dec);
btrfs_mark_buffer_dirty(leaf);
} else {
extent_num_bytes =
btrfs_file_extent_disk_num_bytes(leaf,
fi);
extent_offset = found_key.offset -
btrfs_file_extent_offset(leaf, fi);
num_dec = btrfs_file_extent_num_bytes(leaf, fi);
if (extent_start != 0) {
found_extent = 1;
if (test_bit(BTRFS_ROOT_SHAREABLE,
&root->state))
inode_sub_bytes(&inode->vfs_inode,
num_dec);
}
}
clear_len = num_dec;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
if (!del_item &&
btrfs_file_extent_encryption(leaf, fi) == 0 &&
btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
btrfs_file_extent_compression(leaf, fi) == 0) {
u32 size = (u32)(new_size - found_key.offset);
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
size = btrfs_file_extent_calc_inline_size(size);
btrfs_truncate_item(path, size, 1);
} else if (!del_item) {
ret = NEED_TRUNCATE_BLOCK;
break;
} else {
clear_len = fs_info->sectorsize;
}
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
inode_sub_bytes(&inode->vfs_inode,
item_end + 1 - new_size);
}
delete:
if (root == inode->root) {
ret = btrfs_inode_clear_file_extent_range(inode,
clear_start, clear_len);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
}
}
if (del_item)
last_size = found_key.offset;
else
last_size = new_size;
if (del_item) {
if (!pending_del_nr) {
pending_del_slot = path->slots[0];
pending_del_nr = 1;
} else if (pending_del_nr &&
path->slots[0] + 1 == pending_del_slot) {
pending_del_nr++;
pending_del_slot = path->slots[0];
} else {
BUG();
}
} else {
break;
}
should_throttle = false;
if (found_extent &&
root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_ref ref = { 0 };
bytes_deleted += extent_num_bytes;
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
extent_start, extent_num_bytes, 0);
ref.real_root = root->root_key.objectid;
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
ino, extent_offset);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
}
if (be_nice) {
if (btrfs_should_throttle_delayed_refs(trans))
should_throttle = true;
}
}
if (found_type == BTRFS_INODE_ITEM_KEY)
break;
if (path->slots[0] == 0 ||
path->slots[0] != pending_del_slot ||
should_throttle) {
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path,
pending_del_slot,
pending_del_nr);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
}
pending_del_nr = 0;
}
btrfs_release_path(path);
if (should_throttle) {
ret = btrfs_delayed_refs_rsv_refill(fs_info,
BTRFS_RESERVE_NO_FLUSH);
if (ret) {
ret = -EAGAIN;
break;
}
}
goto search_again;
} else {
path->slots[0]--;
}
}
out:
if (ret >= 0 && pending_del_nr) {
int err;
err = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
if (err) {
btrfs_abort_transaction(trans, err);
ret = err;
}
}
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ASSERT(last_size >= new_size);
if (!ret && last_size > new_size)
last_size = new_size;
btrfs_inode_safe_disk_i_size_write(inode, last_size);
unlock_extent_cached(&inode->io_tree, lock_start, (u64)-1,
&cached_state);
}
btrfs_free_path(path);
return ret;
}
int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
int front)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct address_space *mapping = inode->vfs_inode.i_mapping;
struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
char *kaddr;
bool only_release_metadata = false;
u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (blocksize - 1);
struct page *page;
gfp_t mask = btrfs_alloc_write_mask(mapping);
size_t write_bytes = blocksize;
int ret = 0;
u64 block_start;
u64 block_end;
if (IS_ALIGNED(offset, blocksize) &&
(!len || IS_ALIGNED(len, blocksize)))
goto out;
block_start = round_down(from, blocksize);
block_end = block_start + blocksize - 1;
ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
blocksize);
if (ret < 0) {
if (btrfs_check_nocow_lock(inode, block_start, &write_bytes) > 0) {
only_release_metadata = true;
} else {
goto out;
}
}
ret = btrfs_delalloc_reserve_metadata(inode, blocksize);
if (ret < 0) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode, data_reserved,
block_start, blocksize);
goto out;
}
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize, true);
btrfs_delalloc_release_extents(inode, blocksize);
ret = -ENOMEM;
goto out;
}
if (!PageUptodate(page)) {
ret = btrfs_readpage(NULL, page);
lock_page(page);
if (page->mapping != mapping) {
unlock_page(page);
put_page(page);
goto again;
}
if (!PageUptodate(page)) {
ret = -EIO;
goto out_unlock;
}
}
wait_on_page_writeback(page);
lock_extent_bits(io_tree, block_start, block_end, &cached_state);
set_page_extent_mapped(page);
ordered = btrfs_lookup_ordered_extent(inode, block_start);
if (ordered) {
unlock_extent_cached(io_tree, block_start, block_end,
&cached_state);
unlock_page(page);
put_page(page);
btrfs_start_ordered_extent(ordered, 1);
btrfs_put_ordered_extent(ordered);
goto again;
}
clear_extent_bit(&inode->io_tree, block_start, block_end,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state);
ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
&cached_state);
if (ret) {
unlock_extent_cached(io_tree, block_start, block_end,
&cached_state);
goto out_unlock;
}
if (offset != blocksize) {
if (!len)
len = blocksize - offset;
kaddr = kmap(page);
if (front)
memset(kaddr + (block_start - page_offset(page)),
0, offset);
else
memset(kaddr + (block_start - page_offset(page)) + offset,
0, len);
flush_dcache_page(page);
kunmap(page);
}
ClearPageChecked(page);
set_page_dirty(page);
unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
if (only_release_metadata)
set_extent_bit(&inode->io_tree, block_start, block_end,
EXTENT_NORESERVE, 0, NULL, NULL, GFP_NOFS, NULL);
out_unlock:
if (ret) {
if (only_release_metadata)
btrfs_delalloc_release_metadata(inode, blocksize, true);
else
btrfs_delalloc_release_space(inode, data_reserved,
block_start, blocksize, true);
}
btrfs_delalloc_release_extents(inode, blocksize);
unlock_page(page);
put_page(page);
out:
if (only_release_metadata)
btrfs_check_nocow_unlock(inode);
extent_changeset_free(data_reserved);
return ret;
}
static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,
u64 offset, u64 len)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
struct btrfs_drop_extents_args drop_args = { 0 };
int ret;
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
inode->last_trans = fs_info->generation;
inode->last_sub_trans = root->log_transid;
inode->last_log_commit = root->last_log_commit;
return 0;
}
trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans))
return PTR_ERR(trans);
drop_args.start = offset;
drop_args.end = offset + len;
drop_args.drop_cache = true;
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
offset, 0, 0, len, 0, len, 0, 0, 0);
if (ret) {
btrfs_abort_transaction(trans, ret);
} else {
btrfs_update_inode_bytes(inode, 0, drop_args.bytes_found);
btrfs_update_inode(trans, root, inode);
}
btrfs_end_transaction(trans);
return ret;
}
int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct extent_map_tree *em_tree = &inode->extent_tree;
u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
u64 block_end = ALIGN(size, fs_info->sectorsize);
u64 last_byte;
u64 cur_offset;
u64 hole_size;
int err = 0;
err = btrfs_truncate_block(inode, oldsize, 0, 0);
if (err)
return err;
if (size <= hole_start)
return 0;
btrfs_lock_and_flush_ordered_range(inode, hole_start, block_end - 1,
&cached_state);
cur_offset = hole_start;
while (1) {
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
block_end - cur_offset);
if (IS_ERR(em)) {
err = PTR_ERR(em);
em = NULL;
break;
}
last_byte = min(extent_map_end(em), block_end);
last_byte = ALIGN(last_byte, fs_info->sectorsize);
hole_size = last_byte - cur_offset;
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
struct extent_map *hole_em;
err = maybe_insert_hole(root, inode, cur_offset,
hole_size);
if (err)
break;
err = btrfs_inode_set_file_extent_range(inode,
cur_offset, hole_size);
if (err)
break;
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map();
if (!hole_em) {
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&inode->runtime_flags);
goto next;
}
hole_em->start = cur_offset;
hole_em->len = hole_size;
hole_em->orig_start = cur_offset;
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->orig_block_len = 0;
hole_em->ram_bytes = hole_size;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = fs_info->generation;
while (1) {
write_lock(&em_tree->lock);
err = add_extent_mapping(em_tree, hole_em, 1);
write_unlock(&em_tree->lock);
if (err != -EEXIST)
break;
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset +
hole_size - 1, 0);
}
free_extent_map(hole_em);
} else {
err = btrfs_inode_set_file_extent_range(inode,
cur_offset, hole_size);
if (err)
break;
}
next:
free_extent_map(em);
em = NULL;
cur_offset = last_byte;
if (cur_offset >= block_end)
break;
}
free_extent_map(em);
unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
return err;
}
static int btrfs_setsize(struct inode *inode, struct iattr *attr)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
loff_t oldsize = i_size_read(inode);
loff_t newsize = attr->ia_size;
int mask = attr->ia_valid;
int ret;
if (newsize != oldsize) {
inode_inc_iversion(inode);
if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
inode->i_ctime = inode->i_mtime =
current_time(inode);
}
if (newsize > oldsize) {
btrfs_drew_write_lock(&root->snapshot_lock);
ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, newsize);
if (ret) {
btrfs_drew_write_unlock(&root->snapshot_lock);
return ret;
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
btrfs_drew_write_unlock(&root->snapshot_lock);
return PTR_ERR(trans);
}
i_size_write(inode, newsize);
btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
pagecache_isize_extended(inode, oldsize, newsize);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_end_transaction(trans);
} else {
if (newsize == 0)
set_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
&BTRFS_I(inode)->runtime_flags);
truncate_setsize(inode, newsize);
inode_dio_wait(inode);
ret = btrfs_truncate(inode, newsize == oldsize);
if (ret && inode->i_nlink) {
int err;
err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (err)
return err;
i_size_write(inode, BTRFS_I(inode)->disk_i_size);
}
}
return ret;
}
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
int err;
if (btrfs_root_readonly(root))
return -EROFS;
err = setattr_prepare(dentry, attr);
if (err)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
err = btrfs_setsize(inode, attr);
if (err)
return err;
}
if (attr->ia_valid) {
setattr_copy(inode, attr);
inode_inc_iversion(inode);
err = btrfs_dirty_inode(inode);
if (!err && attr->ia_valid & ATTR_MODE)
err = posix_acl_chmod(inode, inode->i_mode);
}
return err;
}
static void evict_inode_truncate_pages(struct inode *inode)
{
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
struct rb_node *node;
ASSERT(inode->i_state & I_FREEING);
truncate_inode_pages_final(&inode->i_data);
write_lock(&map_tree->lock);
while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
struct extent_map *em;
node = rb_first_cached(&map_tree->map);
em = rb_entry(node, struct extent_map, rb_node);
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
remove_extent_mapping(map_tree, em);
free_extent_map(em);
if (need_resched()) {
write_unlock(&map_tree->lock);
cond_resched();
write_lock(&map_tree->lock);
}
}
write_unlock(&map_tree->lock);
spin_lock(&io_tree->lock);
while (!RB_EMPTY_ROOT(&io_tree->state)) {
struct extent_state *state;
struct extent_state *cached_state = NULL;
u64 start;
u64 end;
unsigned state_flags;
node = rb_first(&io_tree->state);
state = rb_entry(node, struct extent_state, rb_node);
start = state->start;
end = state->end;
state_flags = state->state;
spin_unlock(&io_tree->lock);
lock_extent_bits(io_tree, start, end, &cached_state);
if (state_flags & EXTENT_DELALLOC)
btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
end - start + 1);
clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
&cached_state);
cond_resched();
spin_lock(&io_tree->lock);
}
spin_unlock(&io_tree->lock);
}
static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
struct btrfs_block_rsv *rsv)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_trans_handle *trans;
u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1);
int ret;
ret = btrfs_block_rsv_refill(root, rsv, rsv->size + delayed_refs_extra,
BTRFS_RESERVE_FLUSH_EVICT);
if (ret) {
if (btrfs_check_space_for_delayed_refs(fs_info) ||
btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) {
btrfs_warn(fs_info,
"could not allocate space for delete; will truncate on mount");
return ERR_PTR(-ENOSPC);
}
delayed_refs_extra = 0;
}
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return trans;
if (delayed_refs_extra) {
trans->block_rsv = &fs_info->trans_block_rsv;
trans->bytes_reserved = delayed_refs_extra;
btrfs_block_rsv_migrate(rsv, trans->block_rsv,
delayed_refs_extra, 1);
}
return trans;
}
void btrfs_evict_inode(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv;
int ret;
trace_btrfs_inode_evict(inode);
if (!root) {
clear_inode(inode);
return;
}
evict_inode_truncate_pages(inode);
if (inode->i_nlink &&
((btrfs_root_refs(&root->root_item) != 0 &&
root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
btrfs_is_free_space_inode(BTRFS_I(inode))))
goto no_delete;
if (is_bad_inode(inode))
goto no_delete;
btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
goto no_delete;
if (inode->i_nlink > 0) {
BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
goto no_delete;
}
ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
if (ret)
goto no_delete;
rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
if (!rsv)
goto no_delete;
rsv->size = btrfs_calc_metadata_size(fs_info, 1);
rsv->failfast = 1;
btrfs_i_size_write(BTRFS_I(inode), 0);
while (1) {
trans = evict_refill_and_join(root, rsv);
if (IS_ERR(trans))
goto free_rsv;
trans->block_rsv = rsv;
ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
0, 0);
trans->block_rsv = &fs_info->trans_block_rsv;
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
if (ret && ret != -ENOSPC && ret != -EAGAIN)
goto free_rsv;
else if (!ret)
break;
}
trans = evict_refill_and_join(root, rsv);
if (!IS_ERR(trans)) {
trans->block_rsv = rsv;
btrfs_orphan_del(trans, BTRFS_I(inode));
trans->block_rsv = &fs_info->trans_block_rsv;
btrfs_end_transaction(trans);
}
free_rsv:
btrfs_free_block_rsv(fs_info, rsv);
no_delete:
btrfs_remove_delayed_node(BTRFS_I(inode));
clear_inode(inode);
}
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
struct btrfs_key *location, u8 *type)
{
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
struct btrfs_dir_item *di;
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(dir)->root;
int ret = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
name, namelen, 0);
if (IS_ERR_OR_NULL(di)) {
ret = di ? PTR_ERR(di) : -ENOENT;
goto out;
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
if (location->type != BTRFS_INODE_ITEM_KEY &&
location->type != BTRFS_ROOT_ITEM_KEY) {
ret = -EUCLEAN;
btrfs_warn(root->fs_info,
"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
__func__, name, btrfs_ino(BTRFS_I(dir)),
location->objectid, location->type, location->offset);
}
if (!ret)
*type = btrfs_dir_type(path->nodes[0], di);
out:
btrfs_free_path(path);
return ret;
}
static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
struct inode *dir,
struct dentry *dentry,
struct btrfs_key *location,
struct btrfs_root **sub_root)
{
struct btrfs_path *path;
struct btrfs_root *new_root;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;
int err = 0;
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
goto out;
}
err = -ENOENT;
key.objectid = BTRFS_I(dir)->root->root_key.objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = location->objectid;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
if (ret) {
if (ret < 0)
err = ret;
goto out;
}
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
goto out;
ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
(unsigned long)(ref + 1),
dentry->d_name.len);
if (ret)
goto out;
btrfs_release_path(path);
new_root = btrfs_get_fs_root(fs_info, location->objectid, true);
if (IS_ERR(new_root)) {
err = PTR_ERR(new_root);
goto out;
}
*sub_root = new_root;
location->objectid = btrfs_root_dirid(&new_root->root_item);
location->type = BTRFS_INODE_ITEM_KEY;
location->offset = 0;
err = 0;
out:
btrfs_free_path(path);
return err;
}
static void inode_tree_add(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_inode *entry;
struct rb_node **p;
struct rb_node *parent;
struct rb_node *new = &BTRFS_I(inode)->rb_node;
u64 ino = btrfs_ino(BTRFS_I(inode));
if (inode_unhashed(inode))
return;
parent = NULL;
spin_lock(&root->inode_lock);
p = &root->inode_tree.rb_node;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct btrfs_inode, rb_node);
if (ino < btrfs_ino(entry))
p = &parent->rb_left;
else if (ino > btrfs_ino(entry))
p = &parent->rb_right;
else {
WARN_ON(!(entry->vfs_inode.i_state &
(I_WILL_FREE | I_FREEING)));
rb_replace_node(parent, new, &root->inode_tree);
RB_CLEAR_NODE(parent);
spin_unlock(&root->inode_lock);
return;
}
}
rb_link_node(new, parent, p);
rb_insert_color(new, &root->inode_tree);
spin_unlock(&root->inode_lock);
}
static void inode_tree_del(struct btrfs_inode *inode)
{
struct btrfs_root *root = inode->root;
int empty = 0;
spin_lock(&root->inode_lock);
if (!RB_EMPTY_NODE(&inode->rb_node)) {
rb_erase(&inode->rb_node, &root->inode_tree);
RB_CLEAR_NODE(&inode->rb_node);
empty = RB_EMPTY_ROOT(&root->inode_tree);
}
spin_unlock(&root->inode_lock);
if (empty && btrfs_root_refs(&root->root_item) == 0) {
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
spin_unlock(&root->inode_lock);
if (empty)
btrfs_add_dead_root(root);
}
}
static int btrfs_init_locked_inode(struct inode *inode, void *p)
{
struct btrfs_iget_args *args = p;
inode->i_ino = args->ino;
BTRFS_I(inode)->location.objectid = args->ino;
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.offset = 0;
BTRFS_I(inode)->root = btrfs_grab_root(args->root);
BUG_ON(args->root && !BTRFS_I(inode)->root);
return 0;
}
static int btrfs_find_actor(struct inode *inode, void *opaque)
{
struct btrfs_iget_args *args = opaque;
return args->ino == BTRFS_I(inode)->location.objectid &&
args->root == BTRFS_I(inode)->root;
}
static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
struct btrfs_root *root)
{
struct inode *inode;
struct btrfs_iget_args args;
unsigned long hashval = btrfs_inode_hash(ino, root);
args.ino = ino;
args.root = root;
inode = iget5_locked(s, hashval, btrfs_find_actor,
btrfs_init_locked_inode,
(void *)&args);
return inode;
}
struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
struct btrfs_root *root, struct btrfs_path *path)
{
struct inode *inode;
inode = btrfs_iget_locked(s, ino, root);
if (!inode)
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
int ret;
ret = btrfs_read_locked_inode(inode, path);
if (!ret) {
inode_tree_add(inode);
unlock_new_inode(inode);
} else {
iget_failed(inode);
if (ret > 0)
ret = -ENOENT;
inode = ERR_PTR(ret);
}
}
return inode;
}
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root)
{
return btrfs_iget_path(s, ino, root, NULL);
}
static struct inode *new_simple_dir(struct super_block *s,
struct btrfs_key *key,
struct btrfs_root *root)
{
struct inode *inode = new_inode(s);
if (!inode)
return ERR_PTR(-ENOMEM);
BTRFS_I(inode)->root = btrfs_grab_root(root);
memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
inode->i_op = &simple_dir_inode_operations;
inode->i_opflags &= ~IOP_XATTR;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = current_time(inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
return inode;
}
static inline u8 btrfs_inode_type(struct inode *inode)
{
BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
return fs_umode_to_ftype(inode->i_mode);
}
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
u8 di_type = 0;
int ret = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
if (ret < 0)
return ERR_PTR(ret);
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, location.objectid, root);
if (IS_ERR(inode))
return inode;
if (btrfs_inode_type(inode) != di_type) {
btrfs_crit(fs_info,
"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
inode->i_mode, btrfs_inode_type(inode),
di_type);
iput(inode);
return ERR_PTR(-EUCLEAN);
}
return inode;
}
ret = fixup_tree_root_location(fs_info, dir, dentry,
&location, &sub_root);
if (ret < 0) {
if (ret != -ENOENT)
inode = ERR_PTR(ret);
else
inode = new_simple_dir(dir->i_sb, &location, sub_root);
} else {
inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
}
if (root != sub_root)
btrfs_put_root(sub_root);
if (!IS_ERR(inode) && root != sub_root) {
down_read(&fs_info->cleanup_work_sem);
if (!sb_rdonly(inode->i_sb))
ret = btrfs_orphan_cleanup(sub_root);
up_read(&fs_info->cleanup_work_sem);
if (ret) {
iput(inode);
inode = ERR_PTR(ret);
}
}
return inode;
}
static int btrfs_dentry_delete(const struct dentry *dentry)
{
struct btrfs_root *root;
struct inode *inode = d_inode(dentry);
if (!inode && !IS_ROOT(dentry))
inode = d_inode(dentry->d_parent);
if (inode) {
root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0)
return 1;
if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return 1;
}
return 0;
}
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct inode *inode = btrfs_lookup_dentry(dir, dentry);
if (inode == ERR_PTR(-ENOENT))
inode = NULL;
return d_splice_alias(inode, dentry);
}
static int btrfs_opendir(struct inode *inode, struct file *file)
{
struct btrfs_file_private *private;
private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
if (!private)
return -ENOMEM;
private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!private->filldir_buf) {
kfree(private);
return -ENOMEM;
}
file->private_data = private;
return 0;
}
struct dir_entry {
u64 ino;
u64 offset;
unsigned type;
int name_len;
};
static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
{
while (entries--) {
struct dir_entry *entry = addr;
char *name = (char *)(entry + 1);
ctx->pos = get_unaligned(&entry->offset);
if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
get_unaligned(&entry->ino),
get_unaligned(&entry->type)))
return 1;
addr += sizeof(struct dir_entry) +
get_unaligned(&entry->name_len);
ctx->pos++;
}
return 0;
}
static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_file_private *private = file->private_data;
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_path *path;
void *addr;
struct list_head ins_list;
struct list_head del_list;
int ret;
struct extent_buffer *leaf;
int slot;
char *name_ptr;
int name_len;
int entries = 0;
int total_len = 0;
bool put = false;
struct btrfs_key location;
if (!dir_emit_dots(file, ctx))
return 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
addr = private->filldir_buf;
path->reada = READA_FORWARD;
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
again:
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = ctx->pos;
key.objectid = btrfs_ino(BTRFS_I(inode));
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto err;
while (1) {
struct dir_entry *entry;
leaf = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto err;
else if (ret > 0)
break;
continue;
}
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != key.objectid)
break;
if (found_key.type != BTRFS_DIR_INDEX_KEY)
break;
if (found_key.offset < ctx->pos)
goto next;
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
goto next;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
name_len = btrfs_dir_name_len(leaf, di);
if ((total_len + sizeof(struct dir_entry) + name_len) >=
PAGE_SIZE) {
btrfs_release_path(path);
ret = btrfs_filldir(private->filldir_buf, entries, ctx);
if (ret)
goto nopos;
addr = private->filldir_buf;
entries = 0;
total_len = 0;
goto again;
}
entry = addr;
put_unaligned(name_len, &entry->name_len);
name_ptr = (char *)(entry + 1);
read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
name_len);
put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
&entry->type);
btrfs_dir_item_key_to_cpu(leaf, di, &location);
put_unaligned(location.objectid, &entry->ino);
put_unaligned(found_key.offset, &entry->offset);
entries++;
addr += sizeof(struct dir_entry) + name_len;
total_len += sizeof(struct dir_entry) + name_len;
next:
path->slots[0]++;
}
btrfs_release_path(path);
ret = btrfs_filldir(private->filldir_buf, entries, ctx);
if (ret)
goto nopos;
ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
if (ret)
goto nopos;
if (ctx->pos >= INT_MAX)
ctx->pos = LLONG_MAX;
else
ctx->pos = INT_MAX;
nopos:
ret = 0;
err:
if (put)
btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
btrfs_free_path(path);
return ret;
}
static int btrfs_dirty_inode(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret;
if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
return 0;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret && ret == -ENOSPC) {
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
}
btrfs_end_transaction(trans);
if (BTRFS_I(inode)->delayed_node)
btrfs_balance_delayed_items(fs_info);
return ret;
}
static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
int flags)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
bool dirty = flags & ~S_VERSION;
if (btrfs_root_readonly(root))
return -EROFS;
if (flags & S_VERSION)
dirty |= inode_maybe_inc_iversion(inode, dirty);
if (flags & S_CTIME)
inode->i_ctime = *now;
if (flags & S_MTIME)
inode->i_mtime = *now;
if (flags & S_ATIME)
inode->i_atime = *now;
return dirty ? btrfs_dirty_inode(inode) : 0;
}
static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
{
struct btrfs_root *root = inode->root;
struct btrfs_key key, found_key;
struct btrfs_path *path;
struct extent_buffer *leaf;
int ret;
key.objectid = btrfs_ino(inode);
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = (u64)-1;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
if (ret == 0)
goto out;
ret = 0;
if (path->slots[0] == 0) {
inode->index_cnt = 2;
goto out;
}
path->slots[0]--;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != btrfs_ino(inode) ||
found_key.type != BTRFS_DIR_INDEX_KEY) {
inode->index_cnt = 2;
goto out;
}
inode->index_cnt = found_key.offset + 1;
out:
btrfs_free_path(path);
return ret;
}
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
{
int ret = 0;
if (dir->index_cnt == (u64)-1) {
ret = btrfs_inode_delayed_dir_index_count(dir);
if (ret) {
ret = btrfs_set_inode_index_count(dir);
if (ret)
return ret;
}
}
*index = dir->index_cnt;
dir->index_cnt++;
return ret;
}
static int btrfs_insert_inode_locked(struct inode *inode)
{
struct btrfs_iget_args args;
args.ino = BTRFS_I(inode)->location.objectid;
args.root = BTRFS_I(inode)->root;
return insert_inode_locked4(inode,
btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
btrfs_find_actor, &args);
}
static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
{
unsigned int flags;
if (!dir)
return;
flags = BTRFS_I(dir)->flags;
if (flags & BTRFS_INODE_NOCOMPRESS) {
BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
} else if (flags & BTRFS_INODE_COMPRESS) {
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
}
if (flags & BTRFS_INODE_NODATACOW) {
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
if (S_ISREG(inode->i_mode))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
}
btrfs_sync_inode_flags_to_i_flags(inode);
}
static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir,
const char *name, int name_len,
u64 ref_objectid, u64 objectid,
umode_t mode, u64 *index)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct inode *inode;
struct btrfs_inode_item *inode_item;
struct btrfs_key *location;
struct btrfs_path *path;
struct btrfs_inode_ref *ref;
struct btrfs_key key[2];
u32 sizes[2];
int nitems = name ? 2 : 1;
unsigned long ptr;
unsigned int nofs_flag;
int ret;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
nofs_flag = memalloc_nofs_save();
inode = new_inode(fs_info->sb);
memalloc_nofs_restore(nofs_flag);
if (!inode) {
btrfs_free_path(path);
return ERR_PTR(-ENOMEM);
}
if (!name)
set_nlink(inode, 0);
inode->i_ino = objectid;
if (dir && name) {
trace_btrfs_inode_request(dir);
ret = btrfs_set_inode_index(BTRFS_I(dir), index);
if (ret) {
btrfs_free_path(path);
iput(inode);
return ERR_PTR(ret);
}
} else if (dir) {
*index = 0;
}
BTRFS_I(inode)->index_cnt = 2;
BTRFS_I(inode)->dir_index = *index;
BTRFS_I(inode)->root = btrfs_grab_root(root);
BTRFS_I(inode)->generation = trans->transid;
inode->i_generation = BTRFS_I(inode)->generation;
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
key[0].objectid = objectid;
key[0].type = BTRFS_INODE_ITEM_KEY;
key[0].offset = 0;
sizes[0] = sizeof(struct btrfs_inode_item);
if (name) {
key[1].objectid = objectid;
key[1].type = BTRFS_INODE_REF_KEY;
key[1].offset = ref_objectid;
sizes[1] = name_len + sizeof(*ref);
}
location = &BTRFS_I(inode)->location;
location->objectid = objectid;
location->offset = 0;
location->type = BTRFS_INODE_ITEM_KEY;
ret = btrfs_insert_inode_locked(inode);
if (ret < 0) {
iput(inode);
goto fail;
}
ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
if (ret != 0)
goto fail_unlock;
inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0);
inode->i_mtime = current_time(inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item,
sizeof(*inode_item));
fill_inode_item(trans, path->nodes[0], inode_item, inode);
if (name) {
ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
struct btrfs_inode_ref);
btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
ptr = (unsigned long)(ref + 1);
write_extent_buffer(path->nodes[0], name, ptr, name_len);
}
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
btrfs_inherit_iflags(inode, dir);
if (S_ISREG(mode)) {
if (btrfs_test_opt(fs_info, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
if (btrfs_test_opt(fs_info, NODATACOW))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
BTRFS_INODE_NODATASUM;
}
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
btrfs_update_root_times(trans, root);
ret = btrfs_inode_inherit_props(trans, inode, dir);
if (ret)
btrfs_err(fs_info,
"error inheriting props for ino %llu (root %llu): %d",
btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
return inode;
fail_unlock:
discard_new_inode(inode);
fail:
if (dir && name)
BTRFS_I(dir)->index_cnt--;
btrfs_free_path(path);
return ERR_PTR(ret);
}
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const char *name, int name_len, int add_backref, u64 index)
{
int ret = 0;
struct btrfs_key key;
struct btrfs_root *root = parent_inode->root;
u64 ino = btrfs_ino(inode);
u64 parent_ino = btrfs_ino(parent_inode);
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
memcpy(&key, &inode->root->root_key, sizeof(key));
} else {
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
}
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_add_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
index, name, name_len);
} else if (add_backref) {
ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
parent_ino, index);
}
if (ret)
return ret;
ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
btrfs_inode_type(&inode->vfs_inode), index);
if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
else if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
}
btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
name_len * 2);
inode_inc_iversion(&parent_inode->vfs_inode);
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
struct timespec64 now = current_time(&parent_inode->vfs_inode);
parent_inode->vfs_inode.i_mtime = now;
parent_inode->vfs_inode.i_ctime = now;
}
ret = btrfs_update_inode(trans, root, parent_inode);
if (ret)
btrfs_abort_transaction(trans, ret);
return ret;
fail_dir_item:
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
u64 local_index;
int err;
err = btrfs_del_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
&local_index, name, name_len);
if (err)
btrfs_abort_transaction(trans, err);
} else if (add_backref) {
u64 local_index;
int err;
err = btrfs_del_inode_ref(trans, root, name, name_len,
ino, parent_ino, &local_index);
if (err)
btrfs_abort_transaction(trans, err);
}
return ret;
}
static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, struct dentry *dentry,
struct btrfs_inode *inode, int backref, u64 index)
{
int err = btrfs_add_link(trans, dir, inode,
dentry->d_name.name, dentry->d_name.len,
backref, index);
if (err > 0)
err = -EEXIST;
return err;
}
static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t rdev)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = NULL;
int err;
u64 objectid;
u64 index = 0;
trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans))
return PTR_ERR(trans);
err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
goto out_unlock;
}
inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode, rdev);
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
goto out_unlock;
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
0, index);
if (err)
goto out_unlock;
btrfs_update_inode(trans, root, BTRFS_I(inode));
d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
if (err && inode) {
inode_dec_link_count(inode);
discard_new_inode(inode);
}
return err;
}
static int btrfs_create(struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = NULL;
int err;
u64 objectid;
u64 index = 0;
trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans))
return PTR_ERR(trans);
err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
mode, &index);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
goto out_unlock;
}
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
inode->i_mapping->a_ops = &btrfs_aops;
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
goto out_unlock;
err = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (err)
goto out_unlock;
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
0, index);
if (err)
goto out_unlock;
d_instantiate_new(dentry, inode);
out_unlock:
btrfs_end_transaction(trans);
if (err && inode) {
inode_dec_link_count(inode);
discard_new_inode(inode);
}
btrfs_btree_balance_dirty(fs_info);
return err;
}
static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
struct btrfs_trans_handle *trans = NULL;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = d_inode(old_dentry);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 index;
int err;
int drop_inode = 0;
if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
return -EXDEV;
if (inode->i_nlink >= BTRFS_LINK_MAX)
return -EMLINK;
err = btrfs_set_inode_index(BTRFS_I(dir), &index);
if (err)
goto fail;
trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
trans = NULL;
goto fail;
}
BTRFS_I(inode)->dir_index = 0ULL;
inc_nlink(inode);
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ihold(inode);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
1, index);
if (err) {
drop_inode = 1;
} else {
struct dentry *parent = dentry->d_parent;
err = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (err)
goto fail;
if (inode->i_nlink == 1) {
err = btrfs_orphan_del(trans, BTRFS_I(inode));
if (err)
goto fail;
}
d_instantiate(dentry, inode);
btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
}
fail:
if (trans)
btrfs_end_transaction(trans);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
btrfs_btree_balance_dirty(fs_info);
return err;
}
static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct inode *inode = NULL;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
int err = 0;
u64 objectid = 0;
u64 index = 0;
trans = btrfs_start_transaction(