patch-2.2.18 linux/fs/nfs/read.c

Next file: linux/fs/nfs/symlink.c
Previous file: linux/fs/nfs/proc.c
Back to the patch index
Back to the overall index

diff -u --new-file --recursive --exclude-from /usr/src/exclude v2.2.17/fs/nfs/read.c linux/fs/nfs/read.c
@@ -15,7 +15,8 @@
  * within the RPC code when root squashing is suspected.
  */
 
-#define NFS_NEED_XDR_TYPES
+#define NFS_NEED_NFS2_XDR_TYPES
+#include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -23,43 +24,62 @@
 #include <linux/stat.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
+#include <asm/pgtable.h>
 #include <linux/pagemap.h>
+#include <linux/file.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs2.h>
+#include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/nfs_flushd.h>
 
 #include <asm/segment.h>
 #include <asm/system.h>
-#include <asm/pgtable.h>
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
-struct nfs_rreq {
-	struct inode *		ra_inode;	/* inode from which to read */
-	struct page *		ra_page;	/* page to be read */
-	struct nfs_readargs	ra_args;	/* XDR argument struct */
-	struct nfs_readres	ra_res;		/* ... and result struct */
-	struct nfs_fattr	ra_fattr;	/* fattr storage */
+struct nfs_read_data {
+	struct rpc_task		task;
+	struct dentry		*dentry;
+	struct rpc_cred		*cred;
+	struct nfs_readargs	args;	/* XDR argument struct */
+	struct nfs_readres	res;	/* ... and result struct */
+	struct nfs_fattr	fattr;	/* fattr storage */
+	struct list_head	pages;	/* Coalesced read requests */
 };
 
+/*
+ * Local function declarations
+ */
+static void	nfs_readpage_result(struct rpc_task *task);
+
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
 #endif
 
+static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
+{
+	struct nfs_read_data	*p;
+	p = (struct nfs_read_data*) kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+	}
+	return p;
+}
 
-/*
- * Set up the NFS read request struct
- */
-static inline void
-nfs_readreq_setup(struct nfs_rreq *req, struct nfs_fh *fh,
-		  unsigned long offset, void *buffer, unsigned int rsize)
+static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
 {
-	req->ra_args.fh     = fh;
-	req->ra_args.offset = offset;
-	req->ra_args.count  = rsize;
-	req->ra_args.buffer = buffer;
-	req->ra_res.fattr   = &req->ra_fattr;
-	req->ra_res.count   = rsize;
+	kfree(p);
+}
+
+static void nfs_readdata_release(struct rpc_task *task)
+{
+        struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
+        nfs_readdata_free(data);
 }
 
 
@@ -67,32 +87,37 @@
  * Read a page synchronously.
  */
 static int
-nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page)
+nfs_readpage_sync(struct file *file, struct page *page)
 {
-	struct nfs_rreq	rqst;
-	unsigned long	offset = page->offset;
+	struct dentry	*dentry = file->f_dentry;
+	struct inode	*inode = dentry->d_inode;
+	struct rpc_cred	*cred = nfs_file_cred(file);
+	struct nfs_fattr fattr;
+	unsigned long	offset = nfs_page_offset(page);
 	char		*buffer = (char *) page_address(page);
 	int		rsize = NFS_SERVER(inode)->rsize;
 	int		result, refresh = 0;
-	int		count = PAGE_SIZE;
-	int		flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
+	int		count = PAGE_CACHE_SIZE, chunk, eof = 0;
+	int		flags = 0;
+
+	if (IS_SWAPFILE(inode))
+		flags |= NFS_RPC_SWAPFLAGS;
 
 	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
 	clear_bit(PG_error, &page->flags);
 
 	do {
-		if (count < rsize)
-			rsize = count;
+		if ((chunk = rsize) > count)
+			chunk = count;
 
 		dprintk("NFS: nfs_proc_read(%s, (%s/%s), %ld, %d, %p)\n",
 			NFS_SERVER(inode)->hostname,
 			dentry->d_parent->d_name.name, dentry->d_name.name,
-			offset, rsize, buffer);
+			offset, chunk, buffer);
 
-		/* Set up arguments and perform rpc call */
-		nfs_readreq_setup(&rqst, NFS_FH(dentry), offset, buffer, rsize);
-		result = rpc_call(NFS_CLIENT(inode), NFSPROC_READ,
-					&rqst.ra_args, &rqst.ra_res, flags);
+		result = NFS_CALL(read, inode, (dentry, &fattr, cred, flags,
+						offset, chunk, buffer, &eof));
+		nfs_refresh_inode(inode, &fattr);
 
 		/*
 		 * Even if we had a partial success we can't mark the page
@@ -107,7 +132,7 @@
 		count  -= result;
 		offset += result;
 		buffer += result;
-		if (result < rsize)	/* NFSv2ism */
+		if (eof)
 			break;
 	} while (count);
 
@@ -118,97 +143,311 @@
 
 io_error:
 	/* Note: we don't refresh if the call returned error */
-	if (refresh && result >= 0)
-		nfs_refresh_inode(inode, &rqst.ra_fattr);
-	/* N.B. Use nfs_unlock_page here? */
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
+	if (result < 0)
+		set_bit(PG_error, &page->flags);
 	return result;
 }
 
+static inline struct nfs_page *
+_nfs_find_read(struct inode *inode, struct page *page)
+{
+	struct list_head	*head, *next;
+
+	head = &inode->u.nfs_i.read;
+	next = head->next;
+	while (next != head) {
+		struct nfs_page *req = nfs_list_entry(next);
+		next = next->next;
+		if (page_index(req->wb_page) != page_index(page))
+			continue;
+		req->wb_count++;
+		return req;
+	}
+	return NULL;
+}
+
+static struct nfs_page *
+nfs_find_read(struct inode *inode, struct page *page)
+{
+	struct nfs_page *req;
+	req = _nfs_find_read(inode, page);
+	return req;
+}
+
 /*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
+ * Add a request to the inode's asynchronous read list.
  */
-static void
-nfs_readpage_result(struct rpc_task *task)
+static inline void
+nfs_mark_request_read(struct nfs_page *req)
 {
-	struct nfs_rreq	*req = (struct nfs_rreq *) task->tk_calldata;
-	struct page	*page = req->ra_page;
-	unsigned long	address = page_address(page);
-	int		result = task->tk_status;
-	static int	succ = 0, fail = 0;
-
-	dprintk("NFS: %4d received callback for page %lx, result %d\n",
-			task->tk_pid, address, result);
-
-	if (result >= 0) {
-		result = req->ra_res.count;
-		if (result < PAGE_SIZE) {
-			memset((char *) address + result, 0, PAGE_SIZE - result);
+	struct inode *inode = req->wb_dentry->d_inode;
+
+	if (list_empty(&req->wb_list)) {
+		nfs_list_add_request(req, &inode->u.nfs_i.read);
+		inode->u.nfs_i.nread++;
+	}
+	inode_schedule_scan(inode, req->wb_timeout);
+}
+
+static int
+nfs_readpage_async(struct file *file, struct page *page)
+{
+	struct inode	*inode = file->f_dentry->d_inode;
+	struct nfs_page	*req, *new = NULL;
+	int		result;
+
+	for (;;) {
+		result = 0;
+		if (PageUptodate(page))
+			break;
+
+		req = nfs_find_read(inode, page);
+		if (req) {
+			if (page != req->wb_page) {
+				nfs_release_request(req);
+				nfs_pagein_inode(inode, page_index(page), 0);
+				continue;
+			}
+			nfs_release_request(req);
+			break;
 		}
-		nfs_refresh_inode(req->ra_inode, &req->ra_fattr);
-		flush_dcache_page(address);
-		set_bit(PG_uptodate, &page->flags);
-		succ++;
-	} else {
-		set_bit(PG_error, &page->flags);
-		fail++;
-		dprintk("NFS: %d successful reads, %d failures\n", succ, fail);
+
+		if (new) {
+			nfs_lock_request(new);
+			new->wb_timeout = jiffies + NFS_READ_DELAY;
+			nfs_mark_request_read(new);
+			nfs_unlock_request(new);
+			new = NULL;
+			break;
+		}
+
+		result = -ENOMEM;
+		new = nfs_create_request(file, page, 0, PAGE_CACHE_SIZE);
+		if (!new)
+			break;
 	}
-	/* N.B. Use nfs_unlock_page here? */
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
 
-	free_page(address);
+	if (inode->u.nfs_i.nread >= NFS_SERVER(inode)->rpages ||
+	    page_index(page) == (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+		nfs_pagein_inode(inode, 0, 0);
+	if (new)
+		nfs_release_request(new);
+	return result;
+}
 
-	rpc_release_task(task);
-	kfree(req);
+/*
+ * Set up the NFS read request struct
+ */
+static void
+nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data)
+{
+	struct nfs_page		*req;
+	struct iovec		*iov;
+	unsigned int		count;
+
+	iov = data->args.iov;
+	count = 0;
+	while (!list_empty(head)) {
+		struct nfs_page *req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, &data->pages);
+		iov->iov_base = (void *)(page_address(req->wb_page) + req->wb_offset);
+		iov->iov_len = req->wb_bytes;
+		count += req->wb_bytes;
+		iov++;
+		data->args.nriov++;
+	}
+	req = nfs_list_entry(data->pages.next);
+	data->dentry	  = req->wb_dentry;
+	data->cred	  = req->wb_cred;
+	data->args.fh     = NFS_FH(req->wb_dentry);
+	data->args.offset = nfs_page_offset(req->wb_page) + req->wb_offset;
+	data->args.count  = count;
+	data->res.fattr   = &data->fattr;
+	data->res.count   = count;
+	data->res.eof     = 0;
 }
 
-static inline int
-nfs_readpage_async(struct dentry *dentry, struct inode *inode,
-			struct page *page)
+static void
+nfs_async_read_error(struct list_head *head)
 {
-	unsigned long address = page_address(page);
-	struct nfs_rreq	*req;
-	int		result = -1, flags;
+	struct nfs_page	*req;
+	struct page	*page;
 
-	dprintk("NFS: nfs_readpage_async(%p)\n", page);
-	if (NFS_CONGESTED(inode))
-		goto out_defer;
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		page = req->wb_page;
+		nfs_list_remove_request(req);
+		set_bit(PG_error, &page->flags);
+		nfs_unlock_page(page);
+		nfs_unlock_request(req);
+		nfs_release_request(req);
+	}
+}
+
+static int
+nfs_pagein_one(struct list_head *head, struct dentry *dentry)
+{
+	struct inode		*inode = dentry->d_inode;
+	struct rpc_task		*task;
+	struct rpc_clnt		*clnt = NFS_CLIENT(inode);
+	struct nfs_read_data	*data;
+	struct rpc_message	msg;
+	int			flags;
+	sigset_t		oldset;
+
+	data = nfs_readdata_alloc();
+	if (!data)
+		goto out_bad;
+	task = &data->task;
 
 	/* N.B. Do we need to test? Never called for swapfile inode */
 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-	req = (struct nfs_rreq *) rpc_allocate(flags, sizeof(*req));
-	if (!req)
-		goto out_defer;
-
-	/* Initialize request */
-	/* N.B. Will the dentry remain valid for life of request? */
-	nfs_readreq_setup(req, NFS_FH(dentry), page->offset,
-				(void *) address, PAGE_SIZE);
-	req->ra_inode = inode;
-	req->ra_page = page; /* count has been incremented by caller */
+
+	nfs_read_rpcsetup(head, data);
+
+	/* Finalize the task. */
+	rpc_init_task(task, clnt, nfs_readpage_result, flags);
+	task->tk_calldata = data;
+	/* Release requests */
+	task->tk_release = nfs_readdata_release;
+
+#ifdef CONFIG_NFS_V3
+	msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_READ : NFSPROC_READ;
+#else
+	msg.rpc_proc = NFSPROC_READ;
+#endif
+	msg.rpc_argp = &data->args;
+	msg.rpc_resp = &data->res;
+	msg.rpc_cred = data->cred;
 
 	/* Start the async call */
-	dprintk("NFS: executing async READ request.\n");
-	result = rpc_do_call(NFS_CLIENT(inode), NFSPROC_READ,
-				&req->ra_args, &req->ra_res, flags,
-				nfs_readpage_result, req);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-out:
-	return result;
+	dprintk("NFS: %4d initiated read call (req %s/%s count %d nriov %d.\n",
+		task->tk_pid,
+		dentry->d_parent->d_name.name, dentry->d_name.name,
+		data->args.count, data->args.nriov);
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	rpc_call_setup(task, &msg, 0);
+	rpc_execute(task);
+	rpc_clnt_sigunmask(clnt, &oldset);
+	return 0;
+out_bad:
+	nfs_async_read_error(head);
+	return -ENOMEM;
+}
+
+static int
+nfs_pagein_list(struct inode *inode, struct list_head *head)
+{
+	LIST_HEAD(one_request);
+	struct nfs_page		*req;
+	int			error = 0;
+	unsigned int		pages = 0,
+				rpages = NFS_SERVER(inode)->rpages;
+
+	while (!list_empty(head)) {
+		pages += nfs_coalesce_requests(head, &one_request, rpages);
+		req = nfs_list_entry(one_request.next);
+		error = nfs_pagein_one(&one_request, req->wb_dentry);
+		if (error < 0)
+			break;
+	}
+	if (error >= 0)
+		return pages;
+
+	nfs_async_read_error(head);
+	return error;
+}
 
-out_defer:
-	dprintk("NFS: deferring async READ request.\n");
-	goto out;
-out_free:
-	dprintk("NFS: failed to enqueue async READ request.\n");
-	kfree(req);
-	goto out;
+static int
+nfs_scan_read_timeout(struct inode *inode, struct list_head *dst)
+{
+	int	pages;
+	pages = nfs_scan_list_timeout(&inode->u.nfs_i.read, dst, inode);
+	inode->u.nfs_i.nread -= pages;
+	if ((inode->u.nfs_i.nread == 0) != list_empty(&inode->u.nfs_i.read))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.nread.\n");
+	return pages;
+}
+
+static int
+nfs_scan_read(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+	int	res;
+	res = nfs_scan_list(&inode->u.nfs_i.read, dst, NULL, idx_start, npages);
+	inode->u.nfs_i.nread -= res;
+	if ((inode->u.nfs_i.nread == 0) != list_empty(&inode->u.nfs_i.read))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.nread.\n");
+	return res;
+}
+
+int nfs_pagein_inode(struct inode *inode, unsigned long idx_start,
+		     unsigned int npages)
+{
+	LIST_HEAD(head);
+	int	res,
+		error = 0;
+
+	res = nfs_scan_read(inode, &head, idx_start, npages);
+	if (res)
+		error = nfs_pagein_list(inode, &head);
+	if (error < 0)
+		return error;
+	return res;
+}
+
+int nfs_pagein_timeout(struct inode *inode)
+{
+	LIST_HEAD(head);
+	int	pages,
+		error = 0;
+
+	pages = nfs_scan_read_timeout(inode, &head);
+	if (pages)
+		error = nfs_pagein_list(inode, &head);
+	if (error < 0)
+		return error;
+	return pages;
+}
+
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+static void
+nfs_readpage_result(struct rpc_task *task)
+{
+	struct nfs_read_data	*data = (struct nfs_read_data *) task->tk_calldata;
+	struct dentry		*dentry = data->dentry;
+	struct inode		*inode = dentry->d_inode;
+	int			count = data->res.count;
+
+	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
+		task->tk_pid, task->tk_status);
+
+	nfs_refresh_inode(inode, &data->fattr);
+	while (!list_empty(&data->pages)) {
+		struct nfs_page *req = nfs_list_entry(data->pages.next);
+		struct page *page = req->wb_page;
+		nfs_list_remove_request(req);
+
+		if (task->tk_status >= 0 && count >= 0) {
+			flush_dcache_page(page_address(page)); /* Is this correct? */
+			set_bit(PG_uptodate, &page->flags);
+			count -= PAGE_CACHE_SIZE;
+		} else
+			set_bit(PG_error, &page->flags);
+		nfs_unlock_page(page);
+
+		dprintk("NFS: read (%s/%s %d@%ld)\n",
+                        req->wb_dentry->d_parent->d_name.name,
+                        req->wb_dentry->d_name.name,
+                        req->wb_bytes,
+                        (nfs_page_offset(page) + req->wb_offset));
+		nfs_unlock_request(req);
+		nfs_release_request(req);
+	}
 }
 
 /*
@@ -216,7 +455,7 @@
  * We read the page synchronously in the following cases:
  *  -	The file is a swap file. Swap-ins are always sync operations,
  *	so there's no need bothering to make async reads 100% fail-safe.
- *  -	The NFS rsize is smaller than PAGE_SIZE. We could kludge our way
+ *  -	The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way
  *	around this by creating several consecutive read requests, but
  *	that's hardly worth it.
  *  -	The error flag is set for this page. This happens only when a
@@ -228,41 +467,35 @@
 {
 	struct dentry *dentry = file->f_dentry;
 	struct inode *inode = dentry->d_inode;
-	int		error;
+	int		error = 0,
+			rsize = NFS_SERVER(inode)->rsize;
+
+	while (!nfs_lock_page(page))
+		wait_on_page(page);
 
-	dprintk("NFS: nfs_readpage (%p %ld@%ld)\n",
-		page, PAGE_SIZE, page->offset);
-	atomic_inc(&page->count);
-	set_bit(PG_locked, &page->flags);
+	dprintk("NFS: nfs_readpage (%p %d@%ld)\n",
+		page, rsize, page->offset);
 
 	/*
-	 * Try to flush any pending writes to the file..
-	 *
-	 * NOTE! Because we own the page lock, there cannot
-	 * be any new pending writes generated at this point
-	 * for this page (other pages can be written to).
+	 * Try to flush any pending writes to the file
 	 */
 	error = nfs_wb_page(inode, page);
-	if (error)
-		goto out_error;
+	if (error < 0)
+		goto out_unlock;
 
 	error = -1;
-	if (!IS_SWAPFILE(inode) && !PageError(page) &&
-	    NFS_SERVER(inode)->rsize >= PAGE_SIZE)
-		error = nfs_readpage_async(dentry, inode, page);
+	if (!IS_SWAPFILE(inode) && !PageError(page) && rsize >= PAGE_CACHE_SIZE)
+		error = nfs_readpage_async(file, page);
+
 	if (error >= 0)
 		goto out;
 
-	error = nfs_readpage_sync(dentry, inode, page);
+	error = nfs_readpage_sync(file, page);
 	if (error < 0 && IS_SWAPFILE(inode))
-		printk("Aiee.. nfs swap-in of page failed!\n");
-	goto out_free;
+		printk(KERN_ERR "Aiee.. nfs swap-in of page failed!\n");
 
-out_error:
-	clear_bit(PG_locked, &page->flags);
-	wake_up(&page->wait);
-out_free:
-	free_page(page_address(page));
-out:
+ out_unlock:
+	nfs_unlock_page(page);
+ out:
 	return error;
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)