nfs_bio.c source code [src/src/sys/nfs/nfs_bio.c]

1	/ $NetBSD: nfs_bio.c,v 1.191 2015/07/15 03:28:55 manu Exp $ /
2
3	/*
4	* Copyright (c) 1989, 1993
5	* The Regents of the University of California. All rights reserved.
6	*
7	* This code is derived from software contributed to Berkeley by
8	* Rick Macklem at The University of Guelph.
9	*
10	* Redistribution and use in source and binary forms, with or without
11	* modification, are permitted provided that the following conditions
12	* are met:
13	* 1. Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	* 3. Neither the name of the University nor the names of its contributors
19	* may be used to endorse or promote products derived from this software
20	* without specific prior written permission.
21	*
22	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32	* SUCH DAMAGE.
33	*
34	* @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
35	*/
36
37	#include <sys/cdefs.h>
38	__KERNEL_RCSID(`0`, "$NetBSD: nfs_bio.c,v 1.191 2015/07/15 03:28:55 manu Exp $");
39
40	#ifdef _KERNEL_OPT
41	#include "opt_nfs.h"
42	#include "opt_ddb.h"
43	#endif
44
45	#include <sys/param.h>
46	#include <sys/systm.h>
47	#include <sys/resourcevar.h>
48	#include <sys/signalvar.h>
49	#include <sys/proc.h>
50	#include <sys/buf.h>
51	#include <sys/vnode.h>
52	#include <sys/mount.h>
53	#include <sys/kernel.h>
54	#include <sys/namei.h>
55	#include <sys/dirent.h>
56	#include <sys/kauth.h>
57
58	#include <uvm/uvm_extern.h>
59	#include <uvm/uvm.h>
60
61	#include <nfs/rpcv2.h>
62	#include <nfs/nfsproto.h>
63	#include <nfs/nfs.h>
64	#include <nfs/nfsmount.h>
65	#include <nfs/nfsnode.h>
66	#include <nfs/nfs_var.h>
67
68	extern int nfs_numasync;
69	extern int nfs_commitsize;
70	extern struct nfsstats nfsstats;
71
72	static int nfs_doio_read(struct buf , struct* uio *);
73	static int nfs_doio_write(struct buf , struct* uio *);
74	static int nfs_doio_phys(struct buf , struct* uio *);
75
76	/*
77	* Vnode op for read using bio
78	* Any similarity to readip() is purely coincidental
79	*/
80	int
81	nfs_bioread(struct vnode vp, struct* uio uio, int* ioflag,
82	kauth_cred_t cred, int cflag)
83	{
84	struct nfsnode *np = VTONFS(vp);
85	struct buf bp = NULL, rabp;
86	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
87	struct nfsdircache ndp = NULL, nndp = NULL;
88	void *baddr;
89	int got_buf = `0`, error = `0`, n = `0`, on = `0`, en, enn;
90	int enough = `0`;
91	struct dirent dp, pdp, edp, ep;
92	off_t curoff = `0`;
93	int advice;
94	struct lwp *l = curlwp;
95
96	#ifdef DIAGNOSTIC
97	if (uio->uio_rw != UIO_READ)
98	panic("nfs_read mode");
99	#endif
100	if (uio->uio_resid == `0`)
101	return (`0`);
102	if (vp->v_type != VDIR && uio->uio_offset < `0`)
103	return (EINVAL);
104	#ifndef NFS_V2_ONLY
105	if ((nmp->nm_flag & NFSMNT_NFSV3) &&
106	!(nmp->nm_iflag & NFSMNT_GOTFSINFO))
107	(void)nfs_fsinfo(nmp, vp, cred, l);
108	#endif
109	if (vp->v_type != VDIR &&
110	(uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
111	return (EFBIG);
112
113	/*
114	* For nfs, cache consistency can only be maintained approximately.
115	* Although RFC1094 does not specify the criteria, the following is
116	* believed to be compatible with the reference port.
117	*
118	* If the file's modify time on the server has changed since the
119	* last read rpc or you have written to the file,
120	* you may have lost data cache consistency with the
121	* server, so flush all of the file's data out of the cache.
122	* Then force a getattr rpc to ensure that you have up to date
123	* attributes.
124	* NB: This implies that cache data can be read when up to
125	* nfs_attrtimeo seconds out of date. If you find that you need current
126	* attributes this could be forced by setting n_attrstamp to 0 before
127	* the VOP_GETATTR() call.
128	*/
129
130	if (vp->v_type != VLNK) {
131	error = nfs_flushstalebuf(vp, cred, l,
132	NFS_FLUSHSTALEBUF_MYWRITE);
133	if (error)
134	return error;
135	}
136
137	do {
138	/*
139	* Don't cache symlinks.
140	*/
141	if ((vp->v_vflag & VV_ROOT) && vp->v_type == VLNK) {
142	return (nfs_readlinkrpc(vp, uio, cred));
143	}
144	baddr = (void *)`0`;
145	switch (vp->v_type) {
146	case VREG:
147	nfsstats.biocache_reads++;
148
149	advice = IO_ADV_DECODE(ioflag);
150	error = `0`;
151	while (uio->uio_resid > `0`) {
152	vsize_t bytelen;
153
154	nfs_delayedtruncate(vp);
155	if (np->n_size <= uio->uio_offset) {
156	break;
157	}
158	bytelen =
159	MIN(np->n_size - uio->uio_offset, uio->uio_resid);
160	error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice,
161	UBC_READ \| UBC_PARTIALOK \| UBC_UNMAP_FLAG(vp));
162	if (error) {
163	/*
164	* XXXkludge
165	* the file has been truncated on the server.
166	* there isn't much we can do.
167	*/
168	if (uio->uio_offset >= np->n_size) {
169	/ end of file /
170	error = `0`;
171	} else {
172	break;
173	}
174	}
175	}
176	break;
177
178	case VLNK:
179	nfsstats.biocache_readlinks++;
180	bp = nfs_getcacheblk(vp, (daddr_t)`0`, MAXPATHLEN, l);
181	if (!bp)
182	return (EINTR);
183	if ((bp->b_oflags & BO_DONE) == `0`) {
184	bp->b_flags \|= B_READ;
185	error = nfs_doio(bp);
186	if (error) {
187	brelse(bp, `0`);
188	return (error);
189	}
190	}
191	n = MIN(uio->uio_resid, MAXPATHLEN - bp->b_resid);
192	got_buf = `1`;
193	on = `0`;
194	break;
195	case VDIR:
196	diragain:
197	nfsstats.biocache_readdirs++;
198	ndp = nfs_searchdircache(vp, uio->uio_offset,
199	(nmp->nm_flag & NFSMNT_XLATECOOKIE), `0`);
200	if (!ndp) {
201	/*
202	* We've been handed a cookie that is not
203	* in the cache. If we're not translating
204	* 32 <-> 64, it may be a value that was
205	* flushed out of the cache because it grew
206	* too big. Let the server judge if it's
207	* valid or not. In the translation case,
208	* we have no way of validating this value,
209	* so punt.
210	*/
211	if (nmp->nm_flag & NFSMNT_XLATECOOKIE)
212	return (EINVAL);
213	ndp = nfs_enterdircache(vp, uio->uio_offset,
214	uio->uio_offset, `0`, `0`);
215	}
216
217	if (NFS_EOFVALID(np) &&
218	ndp->dc_cookie == np->n_direofoffset) {
219	nfs_putdircache(np, ndp);
220	nfsstats.direofcache_hits++;
221	return (`0`);
222	}
223
224	bp = nfs_getcacheblk(vp, NFSDC_BLKNO(ndp), NFS_DIRBLKSIZ, l);
225	if (!bp)
226	return (EINTR);
227	if ((bp->b_oflags & BO_DONE) == `0`) {
228	bp->b_flags \|= B_READ;
229	bp->b_dcookie = ndp->dc_blkcookie;
230	error = nfs_doio(bp);
231	if (error) {
232	/*
233	* Yuck! The directory has been modified on the
234	* server. Punt and let the userland code
235	* deal with it.
236	*/
237	nfs_putdircache(np, ndp);
238	brelse(bp, `0`);
239	/*
240	* nfs_request maps NFSERR_BAD_COOKIE to EINVAL.
241	*/
242	if (error == EINVAL) { / NFSERR_BAD_COOKIE /
243	nfs_invaldircache(vp, `0`);
244	nfs_vinvalbuf(vp, `0`, cred, l, `1`);
245	}
246	return (error);
247	}
248	}
249
250	/*
251	* Just return if we hit EOF right away with this
252	* block. Always check here, because direofoffset
253	* may have been set by an nfsiod since the last
254	* check.
255	*
256	* also, empty block implies EOF.
257	*/
258
259	if (bp->b_bcount == bp->b_resid \|\|
260	(NFS_EOFVALID(np) &&
261	ndp->dc_blkcookie == np->n_direofoffset)) {
262	KASSERT(bp->b_bcount != bp->b_resid \|\|
263	ndp->dc_blkcookie == bp->b_dcookie);
264	nfs_putdircache(np, ndp);
265	brelse(bp, BC_NOCACHE);
266	return `0`;
267	}
268
269	/*
270	* Find the entry we were looking for in the block.
271	*/
272
273	en = ndp->dc_entry;
274
275	pdp = dp = (struct dirent *)bp->b_data;
276	edp = (struct dirent )(void* )((char* *)bp->b_data + bp->b_bcount -
277	bp->b_resid);
278	enn = `0`;
279	while (enn < en && dp < edp) {
280	pdp = dp;
281	dp = _DIRENT_NEXT(dp);
282	enn++;
283	}
284
285	/*
286	* If the entry number was bigger than the number of
287	* entries in the block, or the cookie of the previous
288	* entry doesn't match, the directory cache is
289	* stale. Flush it and try again (i.e. go to
290	* the server).
291	*/
292	if (dp >= edp \|\| (struct dirent *)_DIRENT_NEXT(dp) > edp \|\|
293	(en > `0` && NFS_GETCOOKIE(pdp) != ndp->dc_cookie)) {
294	#ifdef DEBUG
295	printf("invalid cache: %p %p %p off %jx %jx\n",
296	pdp, dp, edp,
297	(uintmax_t)uio->uio_offset,
298	(uintmax_t)NFS_GETCOOKIE(pdp));
299	#endif
300	nfs_putdircache(np, ndp);
301	brelse(bp, `0`);
302	nfs_invaldircache(vp, `0`);
303	nfs_vinvalbuf(vp, `0`, cred, l, `0`);
304	goto diragain;
305	}
306
307	on = (char )dp - (char* *)bp->b_data;
308
309	/*
310	* Cache all entries that may be exported to the
311	* user, as they may be thrown back at us. The
312	* NFSBIO_CACHECOOKIES flag indicates that all
313	* entries are being 'exported', so cache them all.
314	*/
315
316	if (en == `0` && pdp == dp) {
317	dp = _DIRENT_NEXT(dp);
318	enn++;
319	}
320
321	if (uio->uio_resid < (bp->b_bcount - bp->b_resid - on)) {
322	n = uio->uio_resid;
323	enough = `1`;
324	} else
325	n = bp->b_bcount - bp->b_resid - on;
326
327	ep = (struct dirent )(void* )((char* *)bp->b_data + on + n);
328
329	/*
330	* Find last complete entry to copy, caching entries
331	* (if requested) as we go.
332	*/
333
334	while (dp < ep && (struct dirent *)_DIRENT_NEXT(dp) <= ep) {
335	if (cflag & NFSBIO_CACHECOOKIES) {
336	nndp = nfs_enterdircache(vp, NFS_GETCOOKIE(pdp),
337	ndp->dc_blkcookie, enn, bp->b_lblkno);
338	if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
339	NFS_STASHCOOKIE32(pdp,
340	nndp->dc_cookie32);
341	}
342	nfs_putdircache(np, nndp);
343	}
344	pdp = dp;
345	dp = _DIRENT_NEXT(dp);
346	enn++;
347	}
348	nfs_putdircache(np, ndp);
349
350	/*
351	* If the last requested entry was not the last in the
352	* buffer (happens if NFS_DIRFRAGSIZ < NFS_DIRBLKSIZ),
353	* cache the cookie of the last requested one, and
354	* set of the offset to it.
355	*/
356
357	if ((on + n) < bp->b_bcount - bp->b_resid) {
358	curoff = NFS_GETCOOKIE(pdp);
359	nndp = nfs_enterdircache(vp, curoff, ndp->dc_blkcookie,
360	enn, bp->b_lblkno);
361	if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
362	NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32);
363	curoff = nndp->dc_cookie32;
364	}
365	nfs_putdircache(np, nndp);
366	} else
367	curoff = bp->b_dcookie;
368
369	/*
370	* Always cache the entry for the next block,
371	* so that readaheads can use it.
372	*/
373	nndp = nfs_enterdircache(vp, bp->b_dcookie, bp->b_dcookie, `0`,`0`);
374	if (nmp->nm_flag & NFSMNT_XLATECOOKIE) {
375	if (curoff == bp->b_dcookie) {
376	NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32);
377	curoff = nndp->dc_cookie32;
378	}
379	}
380
381	n = (char )_DIRENT_NEXT(pdp) - ((char* *)bp->b_data + on);
382
383	/*
384	* If not eof and read aheads are enabled, start one.
385	* (You need the current block first, so that you have the
386	* directory offset cookie of the next block.)
387	*/
388	if (nfs_numasync > `0` && nmp->nm_readahead > `0` &&
389	!NFS_EOFVALID(np)) {
390	rabp = nfs_getcacheblk(vp, NFSDC_BLKNO(nndp),
391	NFS_DIRBLKSIZ, l);
392	if (rabp) {
393	if ((rabp->b_oflags & (BO_DONE \| BO_DELWRI)) == `0`) {
394	rabp->b_dcookie = nndp->dc_cookie;
395	rabp->b_flags \|= (B_READ \| B_ASYNC);
396	if (nfs_asyncio(rabp)) {
397	brelse(rabp, BC_INVAL);
398	}
399	} else
400	brelse(rabp, `0`);
401	}
402	}
403	nfs_putdircache(np, nndp);
404	got_buf = `1`;
405	break;
406	default:
407	printf(" nfsbioread: type %x unexpected\n",vp->v_type);
408	break;
409	}
410
411	if (n > `0`) {
412	if (!baddr)
413	baddr = bp->b_data;
414	error = uiomove((char )baddr + on, (int*)n, uio);
415	}
416	switch (vp->v_type) {
417	case VREG:
418	break;
419	case VLNK:
420	n = `0`;
421	break;
422	case VDIR:
423	uio->uio_offset = curoff;
424	if (enough)
425	n = `0`;
426	break;
427	default:
428	printf(" nfsbioread: type %x unexpected\n",vp->v_type);
429	}
430	if (got_buf)
431	brelse(bp, `0`);
432	} while (error == `0` && uio->uio_resid > `0` && n > `0`);
433	return (error);
434	}
435
436	/*
437	* Vnode op for write using bio
438	*/
439	int
440	nfs_write(void *v)
441	{
442	struct vop_write_args / {*
443	struct vnode a_vp;*
444	struct uio a_uio;*
445	int a_ioflag;
446	kauth_cred_t a_cred;
447	} /* *ap = v;
448	struct uio *uio = ap->a_uio;
449	struct lwp *l = curlwp;
450	struct vnode *vp = ap->a_vp;
451	struct nfsnode *np = VTONFS(vp);
452	kauth_cred_t cred = ap->a_cred;
453	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
454	voff_t oldoff, origoff;
455	vsize_t bytelen;
456	int error = `0`;
457	int ioflag = ap->a_ioflag;
458	int extended = `0`, wrotedata = `0`;
459
460	#ifdef DIAGNOSTIC
461	if (uio->uio_rw != UIO_WRITE)
462	panic("nfs_write mode");
463	#endif
464	if (vp->v_type != VREG)
465	return (EIO);
466	if (np->n_flag & NWRITEERR) {
467	np->n_flag &= ~NWRITEERR;
468	return (np->n_error);
469	}
470	#ifndef NFS_V2_ONLY
471	if ((nmp->nm_flag & NFSMNT_NFSV3) &&
472	!(nmp->nm_iflag & NFSMNT_GOTFSINFO))
473	(void)nfs_fsinfo(nmp, vp, cred, l);
474	#endif
475	if (ioflag & IO_APPEND) {
476	NFS_INVALIDATE_ATTRCACHE(np);
477	error = nfs_flushstalebuf(vp, cred, l,
478	NFS_FLUSHSTALEBUF_MYWRITE);
479	if (error)
480	return (error);
481	uio->uio_offset = np->n_size;
482
483	/*
484	* This is already checked above VOP_WRITE, but recheck
485	* the append case here to make sure our idea of the
486	* file size is as fresh as possible.
487	*/
488	if (uio->uio_offset + uio->uio_resid >
489	l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
490	mutex_enter(proc_lock);
491	psignal(l->l_proc, SIGXFSZ);
492	mutex_exit(proc_lock);
493	return (EFBIG);
494	}
495	}
496	if (uio->uio_offset < `0`)
497	return (EINVAL);
498	if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
499	return (EFBIG);
500	if (uio->uio_resid == `0`)
501	return (`0`);
502
503	origoff = uio->uio_offset;
504	do {
505	bool overwrite; / if we are overwriting whole pages /
506	u_quad_t oldsize;
507	oldoff = uio->uio_offset;
508	bytelen = uio->uio_resid;
509
510	nfsstats.biocache_writes++;
511
512	oldsize = np->n_size;
513	np->n_flag \|= NMODIFIED;
514	if (np->n_size < uio->uio_offset + bytelen) {
515	np->n_size = uio->uio_offset + bytelen;
516	}
517	overwrite = false;
518	if ((uio->uio_offset & PAGE_MASK) == `0`) {
519	if ((vp->v_vflag & VV_MAPPED) == `0` &&
520	bytelen > PAGE_SIZE) {
521	bytelen = trunc_page(bytelen);
522	overwrite = true;
523	} else if ((bytelen & PAGE_MASK) == `0` &&
524	uio->uio_offset >= vp->v_size) {
525	overwrite = true;
526	}
527	}
528	if (vp->v_size < uio->uio_offset + bytelen) {
529	uvm_vnp_setwritesize(vp, uio->uio_offset + bytelen);
530	}
531	error = ubc_uiomove(&vp->v_uobj, uio, bytelen,
532	UVM_ADV_RANDOM, UBC_WRITE \| UBC_PARTIALOK \|
533	(overwrite ? UBC_FAULTBUSY : `0`) \|
534	UBC_UNMAP_FLAG(vp));
535	if (error) {
536	uvm_vnp_setwritesize(vp, vp->v_size);
537	if (overwrite && np->n_size != oldsize) {
538	/*
539	* backout size and free pages past eof.
540	*/
541	np->n_size = oldsize;
542	mutex_enter(vp->v_interlock);
543	(void)VOP_PUTPAGES(vp, round_page(vp->v_size),
544	`0`, PGO_SYNCIO \| PGO_FREE);
545	}
546	break;
547	}
548	wrotedata = `1`;
549
550	/*
551	* update UVM's notion of the size now that we've
552	* copied the data into the vnode's pages.
553	*/
554
555	if (vp->v_size < uio->uio_offset) {
556	uvm_vnp_setsize(vp, uio->uio_offset);
557	extended = `1`;
558	}
559
560	if ((oldoff & ~(nmp->nm_wsize - `1`)) !=
561	(uio->uio_offset & ~(nmp->nm_wsize - `1`))) {
562	mutex_enter(vp->v_interlock);
563	error = VOP_PUTPAGES(vp,
564	trunc_page(oldoff & ~(nmp->nm_wsize - `1`)),
565	round_page((uio->uio_offset + nmp->nm_wsize - `1`) &
566	~(nmp->nm_wsize - `1`)), PGO_CLEANIT);
567	}
568	} while (uio->uio_resid > `0`);
569	if (wrotedata)
570	VN_KNOTE(vp, NOTE_WRITE \| (extended ? NOTE_EXTEND : `0`));
571	if (error == `0` && (ioflag & IO_SYNC) != `0`) {
572	mutex_enter(vp->v_interlock);
573	error = VOP_PUTPAGES(vp,
574	trunc_page(origoff & ~(nmp->nm_wsize - `1`)),
575	round_page((uio->uio_offset + nmp->nm_wsize - `1`) &
576	~(nmp->nm_wsize - `1`)),
577	PGO_CLEANIT \| PGO_SYNCIO);
578	}
579	return error;
580	}
581
582	/*
583	* Get an nfs cache block.
584	* Allocate a new one if the block isn't currently in the cache
585	* and return the block marked busy. If the calling process is
586	* interrupted by a signal for an interruptible mount point, return
587	* NULL.
588	*/
589	struct buf *
590	nfs_getcacheblk(struct vnode vp, daddr_t bn, int* size, struct lwp *l)
591	{
592	struct buf *bp;
593	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
594
595	if (nmp->nm_flag & NFSMNT_INT) {
596	bp = getblk(vp, bn, size, PCATCH, `0`);
597	while (bp == NULL) {
598	if (nfs_sigintr(nmp, NULL, l))
599	return (NULL);
600	bp = getblk(vp, bn, size, `0`, `2` * hz);
601	}
602	} else
603	bp = getblk(vp, bn, size, `0`, `0`);
604	return (bp);
605	}
606
607	/*
608	* Flush and invalidate all dirty buffers. If another process is already
609	* doing the flush, just wait for completion.
610	*/
611	int
612	nfs_vinvalbuf(struct vnode vp, int* flags, kauth_cred_t cred,
613	struct lwp l, int* intrflg)
614	{
615	struct nfsnode *np = VTONFS(vp);
616	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
617	int error = `0`, allerror = `0`, slptimeo;
618	bool catch_p;
619
620	if ((nmp->nm_flag & NFSMNT_INT) == `0`)
621	intrflg = `0`;
622	if (intrflg) {
623	catch_p = true;
624	slptimeo = `2` * hz;
625	} else {
626	catch_p = false;
627	if (nmp->nm_flag & NFSMNT_SOFT)
628	slptimeo = nmp->nm_retry * nmp->nm_timeo;
629	else
630	slptimeo = `0`;
631	}
632	/*
633	* First wait for any other process doing a flush to complete.
634	*/
635	mutex_enter(vp->v_interlock);
636	while (np->n_flag & NFLUSHINPROG) {
637	np->n_flag \|= NFLUSHWANT;
638	error = mtsleep(&np->n_flag, PRIBIO + `2`, "nfsvinval",
639	slptimeo, vp->v_interlock);
640	if (error && intrflg && nfs_sigintr(nmp, NULL, l)) {
641	mutex_exit(vp->v_interlock);
642	return EINTR;
643	}
644	}
645
646	/*
647	* Now, flush as required.
648	*/
649	np->n_flag \|= NFLUSHINPROG;
650	mutex_exit(vp->v_interlock);
651	error = vinvalbuf(vp, flags, cred, l, catch_p, `0`);
652	while (error) {
653	if (allerror == `0`)
654	allerror = error;
655	if (intrflg && nfs_sigintr(nmp, NULL, l)) {
656	error = EINTR;
657	break;
658	}
659	error = vinvalbuf(vp, flags, cred, l, `0`, slptimeo);
660	}
661	mutex_enter(vp->v_interlock);
662	if (allerror != `0`) {
663	/*
664	* Keep error from vinvalbuf so fsync/close will know.
665	*/
666	np->n_error = allerror;
667	np->n_flag \|= NWRITEERR;
668	}
669	if (error == `0`)
670	np->n_flag &= ~NMODIFIED;
671	np->n_flag &= ~NFLUSHINPROG;
672	if (np->n_flag & NFLUSHWANT) {
673	np->n_flag &= ~NFLUSHWANT;
674	wakeup(&np->n_flag);
675	}
676	mutex_exit(vp->v_interlock);
677	return error;
678	}
679
680	/*
681	* nfs_flushstalebuf: flush cache if it's stale.
682	*
683	* => caller shouldn't own any pages or buffers which belong to the vnode.
684	*/
685
686	int
687	nfs_flushstalebuf(struct vnode vp, kauth_cred_t cred, struct* lwp *l,
688	int flags)
689	{
690	struct nfsnode *np = VTONFS(vp);
691	struct vattr vattr;
692	int error;
693
694	if (np->n_flag & NMODIFIED) {
695	if ((flags & NFS_FLUSHSTALEBUF_MYWRITE) == `0`
696	\|\| vp->v_type != VREG) {
697	error = nfs_vinvalbuf(vp, V_SAVE, cred, l, `1`);
698	if (error)
699	return error;
700	if (vp->v_type == VDIR) {
701	nfs_invaldircache(vp, `0`);
702	}
703	} else {
704	/*
705	* XXX assuming writes are ours.
706	*/
707	}
708	NFS_INVALIDATE_ATTRCACHE(np);
709	error = VOP_GETATTR(vp, &vattr, cred);
710	if (error)
711	return error;
712	np->n_mtime = vattr.va_mtime;
713	} else {
714	error = VOP_GETATTR(vp, &vattr, cred);
715	if (error)
716	return error;
717	if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) {
718	if (vp->v_type == VDIR) {
719	nfs_invaldircache(vp, `0`);
720	}
721	error = nfs_vinvalbuf(vp, V_SAVE, cred, l, `1`);
722	if (error)
723	return error;
724	np->n_mtime = vattr.va_mtime;
725	}
726	}
727
728	return error;
729	}
730
731	/*
732	* Initiate asynchronous I/O. Return an error if no nfsiods are available.
733	* This is mainly to avoid queueing async I/O requests when the nfsiods
734	* are all hung on a dead server.
735	*/
736
737	int
738	nfs_asyncio(struct buf *bp)
739	{
740	struct nfs_iod *iod;
741	struct nfsmount *nmp;
742	int slptimeo = `0`, error;
743	bool catch_p = false;
744
745	if (nfs_numasync == `0`)
746	return (EIO);
747
748	nmp = VFSTONFS(bp->b_vp->v_mount);
749
750	if (nmp->nm_flag & NFSMNT_SOFT)
751	slptimeo = nmp->nm_retry * nmp->nm_timeo;
752
753	if (nmp->nm_iflag & NFSMNT_DISMNTFORCE)
754	slptimeo = hz;
755
756	again:
757	if (nmp->nm_flag & NFSMNT_INT)
758	catch_p = true;
759
760	/*
761	* Find a free iod to process this request.
762	*/
763
764	mutex_enter(&nfs_iodlist_lock);
765	iod = LIST_FIRST(&nfs_iodlist_idle);
766	if (iod) {
767	/*
768	* Found one, so wake it up and tell it which
769	* mount to process.
770	*/
771	LIST_REMOVE(iod, nid_idle);
772	mutex_enter(&iod->nid_lock);
773	mutex_exit(&nfs_iodlist_lock);
774	KASSERT(iod->nid_mount == NULL);
775	iod->nid_mount = nmp;
776	cv_signal(&iod->nid_cv);
777	mutex_enter(&nmp->nm_lock);
778	mutex_exit(&iod->nid_lock);
779	nmp->nm_bufqiods++;
780	if (nmp->nm_bufqlen < `2` * nmp->nm_bufqiods) {
781	cv_broadcast(&nmp->nm_aiocv);
782	}
783	} else {
784	mutex_exit(&nfs_iodlist_lock);
785	mutex_enter(&nmp->nm_lock);
786	}
787
788	KASSERT(mutex_owned(&nmp->nm_lock));
789
790	/*
791	* If we have an iod which can process the request, then queue
792	* the buffer. However, even if we have an iod, do not initiate
793	* queue cleaning if curproc is the pageout daemon. if the NFS mount
794	* is via local loopback, we may put curproc (pagedaemon) to sleep
795	* waiting for the writes to complete. But the server (ourself)
796	* may block the write, waiting for its (ie., our) pagedaemon
797	* to produce clean pages to handle the write: deadlock.
798	* XXX: start non-loopback mounts straight away? If "lots free",
799	* let pagedaemon start loopback writes anyway?
800	*/
801	if (nmp->nm_bufqiods > `0`) {
802
803	/*
804	* Ensure that the queue never grows too large.
805	*/
806	if (curlwp == uvm.pagedaemon_lwp) {
807	/ Enque for later, to avoid free-page deadlock /
808	} else while (nmp->nm_bufqlen >= `2` * nmp->nm_bufqiods) {
809	if (catch_p) {
810	error = cv_timedwait_sig(&nmp->nm_aiocv,
811	&nmp->nm_lock, slptimeo);
812	} else {
813	error = cv_timedwait(&nmp->nm_aiocv,
814	&nmp->nm_lock, slptimeo);
815	}
816	if (error) {
817	if (error == EWOULDBLOCK &&
818	nmp->nm_flag & NFSMNT_SOFT) {
819	mutex_exit(&nmp->nm_lock);
820	bp->b_error = EIO;
821	return (EIO);
822	}
823
824	if (nfs_sigintr(nmp, NULL, curlwp)) {
825	mutex_exit(&nmp->nm_lock);
826	return (EINTR);
827	}
828	if (catch_p) {
829	catch_p = false;
830	slptimeo = `2` * hz;
831	}
832	}
833
834	/*
835	* We might have lost our iod while sleeping,
836	* so check and loop if necessary.
837	*/
838
839	if (nmp->nm_bufqiods == `0`) {
840	mutex_exit(&nmp->nm_lock);
841	goto again;
842	}
843	}
844	TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
845	nmp->nm_bufqlen++;
846	mutex_exit(&nmp->nm_lock);
847	return (`0`);
848	}
849	mutex_exit(&nmp->nm_lock);
850
851	/*
852	* All the iods are busy on other mounts, so return EIO to
853	* force the caller to process the i/o synchronously.
854	*/
855
856	return (EIO);
857	}
858
859	/*
860	* nfs_doio for read.
861	*/
862	static int
863	nfs_doio_read(struct buf bp, struct* uio *uiop)
864	{
865	struct vnode *vp = bp->b_vp;
866	struct nfsnode *np = VTONFS(vp);
867	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
868	int error = `0`;
869
870	uiop->uio_rw = UIO_READ;
871	switch (vp->v_type) {
872	case VREG:
873	nfsstats.read_bios++;
874	error = nfs_readrpc(vp, uiop);
875	if (!error && uiop->uio_resid) {
876	int diff, len;
877
878	/*
879	* If uio_resid > 0, there is a hole in the file and
880	* no writes after the hole have been pushed to
881	* the server yet or the file has been truncated
882	* on the server.
883	* Just zero fill the rest of the valid area.
884	*/
885
886	KASSERT(vp->v_size >=
887	uiop->uio_offset + uiop->uio_resid);
888	diff = bp->b_bcount - uiop->uio_resid;
889	len = uiop->uio_resid;
890	memset((char *)bp->b_data + diff, `0`, len);
891	uiop->uio_resid = `0`;
892	}
893	#if 0
894	if (uiop->uio_lwp && (vp->v_iflag & VI_TEXT) &&
895	timespeccmp(&np->n_mtime, &np->n_vattr->va_mtime, !=)) {
896	mutex_enter(proc_lock);
897	killproc(uiop->uio_lwp->l_proc, "process text file was modified");
898	mutex_exit(proc_lock);
899	#if 0 /* XXX NJWLWP */
900	uiop->uio_lwp->l_proc->p_holdcnt++;
901	#endif
902	}
903	#endif
904	break;
905	case VLNK:
906	KASSERT(uiop->uio_offset == (off_t)`0`);
907	nfsstats.readlink_bios++;
908	error = nfs_readlinkrpc(vp, uiop, np->n_rcred);
909	break;
910	case VDIR:
911	nfsstats.readdir_bios++;
912	uiop->uio_offset = bp->b_dcookie;
913	#ifndef NFS_V2_ONLY
914	if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
915	error = nfs_readdirplusrpc(vp, uiop,
916	curlwp->l_cred);
917	/*
918	* nfs_request maps NFSERR_NOTSUPP to ENOTSUP.
919	*/
920	if (error == ENOTSUP)
921	nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
922	}
923	#else
924	nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
925	#endif
926	if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == `0`)
927	error = nfs_readdirrpc(vp, uiop,
928	curlwp->l_cred);
929	if (!error) {
930	bp->b_dcookie = uiop->uio_offset;
931	}
932	break;
933	default:
934	printf("nfs_doio: type %x unexpected\n", vp->v_type);
935	break;
936	}
937	bp->b_error = error;
938	return error;
939	}
940
941	/*
942	* nfs_doio for write.
943	*/
944	static int
945	nfs_doio_write(struct buf bp, struct* uio *uiop)
946	{
947	struct vnode *vp = bp->b_vp;
948	struct nfsnode *np = VTONFS(vp);
949	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
950	int iomode;
951	bool stalewriteverf = false;
952	int i, npages = (bp->b_bcount + PAGE_SIZE - `1`) >> PAGE_SHIFT;
953	struct vm_page *pgs, spgs[UBC_MAX_PAGES];
954	#ifndef NFS_V2_ONLY
955	bool needcommit = true; / need only COMMIT RPC /
956	#else
957	bool needcommit = false; / need only COMMIT RPC /
958	#endif
959	bool pageprotected;
960	struct uvm_object *uobj = &vp->v_uobj;
961	int error;
962	off_t off, cnt;
963
964	if (npages < __arraycount(spgs))
965	pgs = spgs;
966	else {
967	if ((pgs = kmem_alloc(sizeof(pgs) npages, KM_NOSLEEP)) ==
968	NULL)
969	return ENOMEM;
970	}
971
972	if ((bp->b_flags & B_ASYNC) != `0` && NFS_ISV3(vp)) {
973	iomode = NFSV3WRITE_UNSTABLE;
974	} else {
975	iomode = NFSV3WRITE_FILESYNC;
976	}
977
978	#ifndef NFS_V2_ONLY
979	again:
980	#endif
981	rw_enter(&nmp->nm_writeverflock, RW_READER);
982
983	for (i = `0`; i < npages; i++) {
984	pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
985	if (pgs[i]->uobject == uobj &&
986	pgs[i]->offset == uiop->uio_offset + (i << PAGE_SHIFT)) {
987	KASSERT(pgs[i]->flags & PG_BUSY);
988	/*
989	* this page belongs to our object.
990	*/
991	mutex_enter(uobj->vmobjlock);
992	/*
993	* write out the page stably if it's about to
994	* be released because we can't resend it
995	* on the server crash.
996	*
997	* XXX assuming PG_RELEASE\|PG_PAGEOUT won't be
998	* changed until unbusy the page.
999	*/
1000	if (pgs[i]->flags & (PG_RELEASED\|PG_PAGEOUT))
1001	iomode = NFSV3WRITE_FILESYNC;
1002	/*
1003	* if we met a page which hasn't been sent yet,
1004	* we need do WRITE RPC.
1005	*/
1006	if ((pgs[i]->flags & PG_NEEDCOMMIT) == `0`)
1007	needcommit = false;
1008	mutex_exit(uobj->vmobjlock);
1009	} else {
1010	iomode = NFSV3WRITE_FILESYNC;
1011	needcommit = false;
1012	}
1013	}
1014	if (!needcommit && iomode == NFSV3WRITE_UNSTABLE) {
1015	mutex_enter(uobj->vmobjlock);
1016	for (i = `0`; i < npages; i++) {
1017	pgs[i]->flags \|= PG_NEEDCOMMIT \| PG_RDONLY;
1018	pmap_page_protect(pgs[i], VM_PROT_READ);
1019	}
1020	mutex_exit(uobj->vmobjlock);
1021	pageprotected = true; / pages can't be modified during i/o. /
1022	} else
1023	pageprotected = false;
1024
1025	/*
1026	* Send the data to the server if necessary,
1027	* otherwise just send a commit rpc.
1028	*/
1029	#ifndef NFS_V2_ONLY
1030	if (needcommit) {
1031
1032	/*
1033	* If the buffer is in the range that we already committed,
1034	* there's nothing to do.
1035	*
1036	* If it's in the range that we need to commit, push the
1037	* whole range at once, otherwise only push the buffer.
1038	* In both these cases, acquire the commit lock to avoid
1039	* other processes modifying the range.
1040	*/
1041
1042	off = uiop->uio_offset;
1043	cnt = bp->b_bcount;
1044	mutex_enter(&np->n_commitlock);
1045	if (!nfs_in_committed_range(vp, off, bp->b_bcount)) {
1046	bool pushedrange;
1047	if (nfs_in_tobecommitted_range(vp, off, bp->b_bcount)) {
1048	pushedrange = true;
1049	off = np->n_pushlo;
1050	cnt = np->n_pushhi - np->n_pushlo;
1051	} else {
1052	pushedrange = false;
1053	}
1054	error = nfs_commit(vp, off, cnt, curlwp);
1055	if (error == `0`) {
1056	if (pushedrange) {
1057	nfs_merge_commit_ranges(vp);
1058	} else {
1059	nfs_add_committed_range(vp, off, cnt);
1060	}
1061	}
1062	} else {
1063	error = `0`;
1064	}
1065	mutex_exit(&np->n_commitlock);
1066	rw_exit(&nmp->nm_writeverflock);
1067	if (!error) {
1068	/*
1069	* pages are now on stable storage.
1070	*/
1071	uiop->uio_resid = `0`;
1072	mutex_enter(uobj->vmobjlock);
1073	for (i = `0`; i < npages; i++) {
1074	pgs[i]->flags &= ~(PG_NEEDCOMMIT \| PG_RDONLY);
1075	}
1076	mutex_exit(uobj->vmobjlock);
1077	goto out;
1078	} else if (error == NFSERR_STALEWRITEVERF) {
1079	nfs_clearcommit(vp->v_mount);
1080	goto again;
1081	}
1082	if (error) {
1083	bp->b_error = np->n_error = error;
1084	np->n_flag \|= NWRITEERR;
1085	}
1086	goto out;
1087	}
1088	#endif
1089	off = uiop->uio_offset;
1090	cnt = bp->b_bcount;
1091	uiop->uio_rw = UIO_WRITE;
1092	nfsstats.write_bios++;
1093	error = nfs_writerpc(vp, uiop, &iomode, pageprotected, &stalewriteverf);
1094	#ifndef NFS_V2_ONLY
1095	if (!error && iomode == NFSV3WRITE_UNSTABLE) {
1096	/*
1097	* we need to commit pages later.
1098	*/
1099	mutex_enter(&np->n_commitlock);
1100	nfs_add_tobecommitted_range(vp, off, cnt);
1101	/*
1102	* if there can be too many uncommitted pages, commit them now.
1103	*/
1104	if (np->n_pushhi - np->n_pushlo > nfs_commitsize) {
1105	off = np->n_pushlo;
1106	cnt = nfs_commitsize >> `1`;
1107	error = nfs_commit(vp, off, cnt, curlwp);
1108	if (!error) {
1109	nfs_add_committed_range(vp, off, cnt);
1110	nfs_del_tobecommitted_range(vp, off, cnt);
1111	}
1112	if (error == NFSERR_STALEWRITEVERF) {
1113	stalewriteverf = true;
1114	error = `0`; / it isn't a real error /
1115	}
1116	} else {
1117	/*
1118	* re-dirty pages so that they will be passed
1119	* to us later again.
1120	*/
1121	mutex_enter(uobj->vmobjlock);
1122	for (i = `0`; i < npages; i++) {
1123	pgs[i]->flags &= ~PG_CLEAN;
1124	}
1125	mutex_exit(uobj->vmobjlock);
1126	}
1127	mutex_exit(&np->n_commitlock);
1128	} else
1129	#endif
1130	if (!error) {
1131	/*
1132	* pages are now on stable storage.
1133	*/
1134	mutex_enter(&np->n_commitlock);
1135	nfs_del_committed_range(vp, off, cnt);
1136	mutex_exit(&np->n_commitlock);
1137	mutex_enter(uobj->vmobjlock);
1138	for (i = `0`; i < npages; i++) {
1139	pgs[i]->flags &= ~(PG_NEEDCOMMIT \| PG_RDONLY);
1140	}
1141	mutex_exit(uobj->vmobjlock);
1142	} else {
1143	/*
1144	* we got an error.
1145	*/
1146	bp->b_error = np->n_error = error;
1147	np->n_flag \|= NWRITEERR;
1148	}
1149
1150	rw_exit(&nmp->nm_writeverflock);
1151
1152
1153	if (stalewriteverf) {
1154	nfs_clearcommit(vp->v_mount);
1155	}
1156	#ifndef NFS_V2_ONLY
1157	out:
1158	#endif
1159	if (pgs != spgs)
1160	kmem_free(pgs, sizeof(pgs) npages);
1161	return error;
1162	}
1163
1164	/*
1165	* nfs_doio for B_PHYS.
1166	*/
1167	static int
1168	nfs_doio_phys(struct buf bp, struct* uio *uiop)
1169	{
1170	struct vnode *vp = bp->b_vp;
1171	int error;
1172
1173	uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
1174	if (bp->b_flags & B_READ) {
1175	uiop->uio_rw = UIO_READ;
1176	nfsstats.read_physios++;
1177	error = nfs_readrpc(vp, uiop);
1178	} else {
1179	int iomode = NFSV3WRITE_DATASYNC;
1180	bool stalewriteverf;
1181	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1182
1183	uiop->uio_rw = UIO_WRITE;
1184	nfsstats.write_physios++;
1185	rw_enter(&nmp->nm_writeverflock, RW_READER);
1186	error = nfs_writerpc(vp, uiop, &iomode, false, &stalewriteverf);
1187	rw_exit(&nmp->nm_writeverflock);
1188	if (stalewriteverf) {
1189	nfs_clearcommit(bp->b_vp->v_mount);
1190	}
1191	}
1192	bp->b_error = error;
1193	return error;
1194	}
1195
1196	/*
1197	* Do an I/O operation to/from a cache block. This may be called
1198	* synchronously or from an nfsiod.
1199	*/
1200	int
1201	nfs_doio(struct buf *bp)
1202	{
1203	int error;
1204	struct uio uio;
1205	struct uio *uiop = &uio;
1206	struct iovec io;
1207	UVMHIST_FUNC("nfs_doio"); UVMHIST_CALLED(ubchist);
1208
1209	uiop->uio_iov = &io;
1210	uiop->uio_iovcnt = `1`;
1211	uiop->uio_offset = (((off_t)bp->b_blkno) << DEV_BSHIFT);
1212	UIO_SETUP_SYSSPACE(uiop);
1213	io.iov_base = bp->b_data;
1214	io.iov_len = uiop->uio_resid = bp->b_bcount;
1215
1216	/*
1217	* Historically, paging was done with physio, but no more...
1218	*/
1219	if (bp->b_flags & B_PHYS) {
1220	/*
1221	* ...though reading /dev/drum still gets us here.
1222	*/
1223	error = nfs_doio_phys(bp, uiop);
1224	} else if (bp->b_flags & B_READ) {
1225	error = nfs_doio_read(bp, uiop);
1226	} else {
1227	error = nfs_doio_write(bp, uiop);
1228	}
1229	bp->b_resid = uiop->uio_resid;
1230	biodone(bp);
1231	return (error);
1232	}
1233
1234	/*
1235	* Vnode op for VM getpages.
1236	*/
1237
1238	int
1239	nfs_getpages(void *v)
1240	{
1241	struct vop_getpages_args / {*
1242	struct vnode a_vp;*
1243	voff_t a_offset;
1244	struct vm_page a_m;
1245	int a_count;*
1246	int a_centeridx;
1247	vm_prot_t a_access_type;
1248	int a_advice;
1249	int a_flags;
1250	} /* *ap = v;
1251
1252	struct vnode *vp = ap->a_vp;
1253	struct uvm_object *uobj = &vp->v_uobj;
1254	struct nfsnode *np = VTONFS(vp);
1255	const int npages = *ap->a_count;
1256	struct vm_page pg, pgs, opgs, spgs[UBC_MAX_PAGES];
1257	off_t origoffset, len;
1258	int i, error;
1259	bool v3 = NFS_ISV3(vp);
1260	bool write = (ap->a_access_type & VM_PROT_WRITE) != `0`;
1261	bool locked = (ap->a_flags & PGO_LOCKED) != `0`;
1262
1263	/*
1264	* If we are not locked we are not really using opgs,
1265	* so just initialize it
1266	*/
1267	if (!locked \|\| npages < __arraycount(spgs))
1268	opgs = spgs;
1269	else {
1270	if ((opgs = kmem_alloc(npages * sizeof(*opgs), KM_NOSLEEP)) ==
1271	NULL)
1272	return ENOMEM;
1273	}
1274
1275	/*
1276	* call the genfs code to get the pages. `pgs' may be NULL
1277	* when doing read-ahead.
1278	*/
1279	pgs = ap->a_m;
1280	if (write && locked && v3) {
1281	KASSERT(pgs != NULL);
1282	#ifdef DEBUG
1283
1284	/*
1285	* If PGO_LOCKED is set, real pages shouldn't exists
1286	* in the array.
1287	*/
1288
1289	for (i = `0`; i < npages; i++)
1290	KDASSERT(pgs[i] == NULL \|\| pgs[i] == PGO_DONTCARE);
1291	#endif
1292	memcpy(opgs, pgs, npages * sizeof(struct vm_pages *));
1293	}
1294	error = genfs_getpages(v);
1295	if (error)
1296	goto out;
1297
1298	/*
1299	* for read faults where the nfs node is not yet marked NMODIFIED,
1300	* set PG_RDONLY on the pages so that we come back here if someone
1301	* tries to modify later via the mapping that will be entered for
1302	* this fault.
1303	*/
1304
1305	if (!write && (np->n_flag & NMODIFIED) == `0` && pgs != NULL) {
1306	if (!locked) {
1307	mutex_enter(uobj->vmobjlock);
1308	}
1309	for (i = `0`; i < npages; i++) {
1310	pg = pgs[i];
1311	if (pg == NULL \|\| pg == PGO_DONTCARE) {
1312	continue;
1313	}
1314	pg->flags \|= PG_RDONLY;
1315	}
1316	if (!locked) {
1317	mutex_exit(uobj->vmobjlock);
1318	}
1319	}
1320	if (!write)
1321	goto out;
1322
1323	/*
1324	* this is a write fault, update the commit info.
1325	*/
1326
1327	origoffset = ap->a_offset;
1328	len = npages << PAGE_SHIFT;
1329
1330	if (v3) {
1331	if (!locked) {
1332	mutex_enter(&np->n_commitlock);
1333	} else {
1334	if (!mutex_tryenter(&np->n_commitlock)) {
1335
1336	/*
1337	* Since PGO_LOCKED is set, we need to unbusy
1338	* all pages fetched by genfs_getpages() above,
1339	* tell the caller that there are no pages
1340	* available and put back original pgs array.
1341	*/
1342
1343	mutex_enter(&uvm_pageqlock);
1344	uvm_page_unbusy(pgs, npages);
1345	mutex_exit(&uvm_pageqlock);
1346	*ap->a_count = `0`;
1347	memcpy(pgs, opgs,
1348	npages * sizeof(struct vm_pages *));
1349	error = EBUSY;
1350	goto out;
1351	}
1352	}
1353	nfs_del_committed_range(vp, origoffset, len);
1354	nfs_del_tobecommitted_range(vp, origoffset, len);
1355	}
1356	np->n_flag \|= NMODIFIED;
1357	if (!locked) {
1358	mutex_enter(uobj->vmobjlock);
1359	}
1360	for (i = `0`; i < npages; i++) {
1361	pg = pgs[i];
1362	if (pg == NULL \|\| pg == PGO_DONTCARE) {
1363	continue;
1364	}
1365	pg->flags &= ~(PG_NEEDCOMMIT \| PG_RDONLY);
1366	}
1367	if (!locked) {
1368	mutex_exit(uobj->vmobjlock);
1369	}
1370	if (v3) {
1371	mutex_exit(&np->n_commitlock);
1372	}
1373	out:
1374	if (opgs != spgs)
1375	kmem_free(opgs, sizeof(opgs) npages);
1376	return error;
1377	}
1378

Browse the source code of src/src/sys/nfs/nfs_bio.c