Skip to content

Commit 4506cfb

Browse files
Ryan Dingtorvalds
authored andcommitted
ocfs2: record UNWRITTEN extents when populate write desc
To support direct io in ocfs2_write_begin_nolock & ocfs2_write_end_nolock. There is still one issue in the direct write procedure. phase 1: alloc extent with UNWRITTEN flag phase 2: submit direct data to disk, add zero page to page cache phase 3: clear UNWRITTEN flag when data has been written to disk When there are 2 direct write A(0~3KB),B(4~7KB) writing to the same cluster 0~7KB (cluster size 8KB). Write request A arrive phase 2 first, it will zero the region (4~7KB). Before request A enter to phase 3, request B arrive phase 2, it will zero region (0~3KB). This is just like request B steps request A. To resolve this issue, we should let request B knows this cluster is already under zero, to prevent it from steps the previous write request. This patch will add function ocfs2_unwritten_check() to do this job. It will record all clusters that are under direct write(it will be recorded in the 'ip_unwritten_list' member of inode info), and prevent the later direct write writing to the same cluster to do the zero work again. Signed-off-by: Ryan Ding <[email protected]> Reviewed-by: Junxiao Bi <[email protected]> Cc: Joseph Qi <[email protected]> Cc: Mark Fasheh <[email protected]> Cc: Joel Becker <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 2de6a3c commit 4506cfb

File tree

4 files changed

+106
-5
lines changed

4 files changed

+106
-5
lines changed

fs/ocfs2/aops.c

Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,6 +1201,13 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
12011201

12021202
#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)
12031203

1204+
struct ocfs2_unwritten_extent {
1205+
struct list_head ue_node;
1206+
struct list_head ue_ip_node;
1207+
u32 ue_cpos;
1208+
u32 ue_phys;
1209+
};
1210+
12041211
/*
12051212
* Describe the state of a single cluster to be written to.
12061213
*/
@@ -1275,6 +1282,8 @@ struct ocfs2_write_ctxt {
12751282
struct buffer_head *w_di_bh;
12761283

12771284
struct ocfs2_cached_dealloc_ctxt w_dealloc;
1285+
1286+
struct list_head w_unwritten_list;
12781287
};
12791288

12801289
void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
@@ -1313,8 +1322,25 @@ static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
13131322
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
13141323
}
13151324

1316-
static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
1325+
static void ocfs2_free_unwritten_list(struct inode *inode,
1326+
struct list_head *head)
1327+
{
1328+
struct ocfs2_inode_info *oi = OCFS2_I(inode);
1329+
struct ocfs2_unwritten_extent *dz = NULL, *tmp = NULL;
1330+
1331+
list_for_each_entry_safe(dz, tmp, head, ue_node) {
1332+
list_del(&dz->ue_node);
1333+
spin_lock(&oi->ip_lock);
1334+
list_del(&dz->ue_ip_node);
1335+
spin_unlock(&oi->ip_lock);
1336+
kfree(dz);
1337+
}
1338+
}
1339+
1340+
static void ocfs2_free_write_ctxt(struct inode *inode,
1341+
struct ocfs2_write_ctxt *wc)
13171342
{
1343+
ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list);
13181344
ocfs2_unlock_pages(wc);
13191345
brelse(wc->w_di_bh);
13201346
kfree(wc);
@@ -1346,6 +1372,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
13461372
wc->w_large_pages = 0;
13471373

13481374
ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
1375+
INIT_LIST_HEAD(&wc->w_unwritten_list);
13491376

13501377
*wcp = wc;
13511378

@@ -1795,6 +1822,66 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
17951822
}
17961823
}
17971824

1825+
/*
1826+
* Check if this extent is marked UNWRITTEN by direct io. If so, we need not to
1827+
* do the zero work. And should not to clear UNWRITTEN since it will be cleared
1828+
* by the direct io procedure.
1829+
* If this is a new extent that allocated by direct io, we should mark it in
1830+
* the ip_unwritten_list.
1831+
*/
1832+
static int ocfs2_unwritten_check(struct inode *inode,
1833+
struct ocfs2_write_ctxt *wc,
1834+
struct ocfs2_write_cluster_desc *desc)
1835+
{
1836+
struct ocfs2_inode_info *oi = OCFS2_I(inode);
1837+
struct ocfs2_unwritten_extent *dz = NULL, *new = NULL;
1838+
int ret = 0;
1839+
1840+
if (!desc->c_needs_zero)
1841+
return 0;
1842+
1843+
retry:
1844+
spin_lock(&oi->ip_lock);
1845+
/* Needs not to zero no metter buffer or direct. The one who is zero
1846+
* the cluster is doing zero. And he will clear unwritten after all
1847+
* cluster io finished. */
1848+
list_for_each_entry(dz, &oi->ip_unwritten_list, ue_ip_node) {
1849+
if (desc->c_cpos == dz->ue_cpos) {
1850+
BUG_ON(desc->c_new);
1851+
desc->c_needs_zero = 0;
1852+
desc->c_clear_unwritten = 0;
1853+
goto unlock;
1854+
}
1855+
}
1856+
1857+
if (wc->w_type != OCFS2_WRITE_DIRECT)
1858+
goto unlock;
1859+
1860+
if (new == NULL) {
1861+
spin_unlock(&oi->ip_lock);
1862+
new = kmalloc(sizeof(struct ocfs2_unwritten_extent),
1863+
GFP_NOFS);
1864+
if (new == NULL) {
1865+
ret = -ENOMEM;
1866+
goto out;
1867+
}
1868+
goto retry;
1869+
}
1870+
/* This direct write will doing zero. */
1871+
new->ue_cpos = desc->c_cpos;
1872+
new->ue_phys = desc->c_phys;
1873+
desc->c_clear_unwritten = 0;
1874+
list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list);
1875+
list_add_tail(&new->ue_node, &wc->w_unwritten_list);
1876+
new = NULL;
1877+
unlock:
1878+
spin_unlock(&oi->ip_lock);
1879+
out:
1880+
if (new)
1881+
kfree(new);
1882+
return ret;
1883+
}
1884+
17981885
/*
17991886
* Populate each single-cluster write descriptor in the write context
18001887
* with information about the i/o to be done.
@@ -1879,6 +1966,12 @@ static int ocfs2_populate_write_desc(struct inode *inode,
18791966
desc->c_needs_zero = 1;
18801967
}
18811968

1969+
ret = ocfs2_unwritten_check(inode, wc, desc);
1970+
if (ret) {
1971+
mlog_errno(ret);
1972+
goto out;
1973+
}
1974+
18821975
num_clusters--;
18831976
}
18841977

@@ -2215,9 +2308,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
22152308
* and non-sparse clusters we just extended. For non-sparse writes,
22162309
* we know zeros will only be needed in the first and/or last cluster.
22172310
*/
2218-
if (clusters_to_alloc || extents_to_split ||
2219-
(wc->w_clen && (wc->w_desc[0].c_needs_zero ||
2220-
wc->w_desc[wc->w_clen - 1].c_needs_zero)))
2311+
if (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
2312+
wc->w_desc[wc->w_clen - 1].c_needs_zero))
22212313
cluster_of_pages = 1;
22222314
else
22232315
cluster_of_pages = 0;
@@ -2296,7 +2388,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
22962388
ocfs2_commit_trans(osb, handle);
22972389

22982390
out:
2299-
ocfs2_free_write_ctxt(wc);
2391+
ocfs2_free_write_ctxt(inode, wc);
23002392

23012393
if (data_ac) {
23022394
ocfs2_free_alloc_context(data_ac);
@@ -2406,6 +2498,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
24062498
handle_t *handle = wc->w_handle;
24072499
struct page *tmppage;
24082500

2501+
BUG_ON(!list_empty(&wc->w_unwritten_list));
2502+
24092503
if (handle) {
24102504
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
24112505
wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE);

fs/ocfs2/inode.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,9 @@ static void ocfs2_clear_inode(struct inode *inode)
11701170
mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
11711171
"Clear inode of %llu, inode has io markers\n",
11721172
(unsigned long long)oi->ip_blkno);
1173+
mlog_bug_on_msg(!list_empty(&oi->ip_unwritten_list),
1174+
"Clear inode of %llu, inode has unwritten extents\n",
1175+
(unsigned long long)oi->ip_blkno);
11731176

11741177
ocfs2_extent_map_trunc(inode, 0);
11751178

fs/ocfs2/inode.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ struct ocfs2_inode_info
5757
u32 ip_flags; /* see below */
5858
u32 ip_attr; /* inode attributes */
5959

60+
/* Record unwritten extents during direct io. */
61+
struct list_head ip_unwritten_list;
62+
6063
/* protected by recovery_lock. */
6164
struct inode *ip_next_orphan;
6265

fs/ocfs2/super.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,6 +1745,7 @@ static void ocfs2_inode_init_once(void *data)
17451745
spin_lock_init(&oi->ip_lock);
17461746
ocfs2_extent_map_init(&oi->vfs_inode);
17471747
INIT_LIST_HEAD(&oi->ip_io_markers);
1748+
INIT_LIST_HEAD(&oi->ip_unwritten_list);
17481749
oi->ip_dir_start_lookup = 0;
17491750
mutex_init(&oi->ip_unaligned_aio);
17501751
init_rwsem(&oi->ip_alloc_sem);

0 commit comments

Comments
 (0)