1 | /* $NetBSD: nfs_bio.c,v 1.191 2015/07/15 03:28:55 manu Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1989, 1993 |
5 | * The Regents of the University of California. All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to Berkeley by |
8 | * Rick Macklem at The University of Guelph. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * 3. Neither the name of the University nor the names of its contributors |
19 | * may be used to endorse or promote products derived from this software |
20 | * without specific prior written permission. |
21 | * |
22 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
23 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
27 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
28 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
29 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
30 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
31 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
32 | * SUCH DAMAGE. |
33 | * |
34 | * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 |
35 | */ |
36 | |
37 | #include <sys/cdefs.h> |
38 | __KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.191 2015/07/15 03:28:55 manu Exp $" ); |
39 | |
40 | #ifdef _KERNEL_OPT |
41 | #include "opt_nfs.h" |
42 | #include "opt_ddb.h" |
43 | #endif |
44 | |
45 | #include <sys/param.h> |
46 | #include <sys/systm.h> |
47 | #include <sys/resourcevar.h> |
48 | #include <sys/signalvar.h> |
49 | #include <sys/proc.h> |
50 | #include <sys/buf.h> |
51 | #include <sys/vnode.h> |
52 | #include <sys/mount.h> |
53 | #include <sys/kernel.h> |
54 | #include <sys/namei.h> |
55 | #include <sys/dirent.h> |
56 | #include <sys/kauth.h> |
57 | |
58 | #include <uvm/uvm_extern.h> |
59 | #include <uvm/uvm.h> |
60 | |
61 | #include <nfs/rpcv2.h> |
62 | #include <nfs/nfsproto.h> |
63 | #include <nfs/nfs.h> |
64 | #include <nfs/nfsmount.h> |
65 | #include <nfs/nfsnode.h> |
66 | #include <nfs/nfs_var.h> |
67 | |
68 | extern int nfs_numasync; |
69 | extern int nfs_commitsize; |
70 | extern struct nfsstats nfsstats; |
71 | |
72 | static int nfs_doio_read(struct buf *, struct uio *); |
73 | static int nfs_doio_write(struct buf *, struct uio *); |
74 | static int nfs_doio_phys(struct buf *, struct uio *); |
75 | |
76 | /* |
77 | * Vnode op for read using bio |
78 | * Any similarity to readip() is purely coincidental |
79 | */ |
80 | int |
81 | nfs_bioread(struct vnode *vp, struct uio *uio, int ioflag, |
82 | kauth_cred_t cred, int cflag) |
83 | { |
84 | struct nfsnode *np = VTONFS(vp); |
85 | struct buf *bp = NULL, *rabp; |
86 | struct nfsmount *nmp = VFSTONFS(vp->v_mount); |
87 | struct nfsdircache *ndp = NULL, *nndp = NULL; |
88 | void *baddr; |
89 | int got_buf = 0, error = 0, n = 0, on = 0, en, enn; |
90 | int enough = 0; |
91 | struct dirent *dp, *pdp, *edp, *ep; |
92 | off_t curoff = 0; |
93 | int advice; |
94 | struct lwp *l = curlwp; |
95 | |
96 | #ifdef DIAGNOSTIC |
97 | if (uio->uio_rw != UIO_READ) |
98 | panic("nfs_read mode" ); |
99 | #endif |
100 | if (uio->uio_resid == 0) |
101 | return (0); |
102 | if (vp->v_type != VDIR && uio->uio_offset < 0) |
103 | return (EINVAL); |
104 | #ifndef NFS_V2_ONLY |
105 | if ((nmp->nm_flag & NFSMNT_NFSV3) && |
106 | !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) |
107 | (void)nfs_fsinfo(nmp, vp, cred, l); |
108 | #endif |
109 | if (vp->v_type != VDIR && |
110 | (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) |
111 | return (EFBIG); |
112 | |
113 | /* |
114 | * For nfs, cache consistency can only be maintained approximately. |
115 | * Although RFC1094 does not specify the criteria, the following is |
116 | * believed to be compatible with the reference port. |
117 | * |
118 | * If the file's modify time on the server has changed since the |
119 | * last read rpc or you have written to the file, |
120 | * you may have lost data cache consistency with the |
121 | * server, so flush all of the file's data out of the cache. |
122 | * Then force a getattr rpc to ensure that you have up to date |
123 | * attributes. |
124 | * NB: This implies that cache data can be read when up to |
125 | * nfs_attrtimeo seconds out of date. If you find that you need current |
126 | * attributes this could be forced by setting n_attrstamp to 0 before |
127 | * the VOP_GETATTR() call. |
128 | */ |
129 | |
130 | if (vp->v_type != VLNK) { |
131 | error = nfs_flushstalebuf(vp, cred, l, |
132 | NFS_FLUSHSTALEBUF_MYWRITE); |
133 | if (error) |
134 | return error; |
135 | } |
136 | |
137 | do { |
138 | /* |
139 | * Don't cache symlinks. |
140 | */ |
141 | if ((vp->v_vflag & VV_ROOT) && vp->v_type == VLNK) { |
142 | return (nfs_readlinkrpc(vp, uio, cred)); |
143 | } |
144 | baddr = (void *)0; |
145 | switch (vp->v_type) { |
146 | case VREG: |
147 | nfsstats.biocache_reads++; |
148 | |
149 | advice = IO_ADV_DECODE(ioflag); |
150 | error = 0; |
151 | while (uio->uio_resid > 0) { |
152 | vsize_t bytelen; |
153 | |
154 | nfs_delayedtruncate(vp); |
155 | if (np->n_size <= uio->uio_offset) { |
156 | break; |
157 | } |
158 | bytelen = |
159 | MIN(np->n_size - uio->uio_offset, uio->uio_resid); |
160 | error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice, |
161 | UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp)); |
162 | if (error) { |
163 | /* |
164 | * XXXkludge |
165 | * the file has been truncated on the server. |
166 | * there isn't much we can do. |
167 | */ |
168 | if (uio->uio_offset >= np->n_size) { |
169 | /* end of file */ |
170 | error = 0; |
171 | } else { |
172 | break; |
173 | } |
174 | } |
175 | } |
176 | break; |
177 | |
178 | case VLNK: |
179 | nfsstats.biocache_readlinks++; |
180 | bp = nfs_getcacheblk(vp, (daddr_t)0, MAXPATHLEN, l); |
181 | if (!bp) |
182 | return (EINTR); |
183 | if ((bp->b_oflags & BO_DONE) == 0) { |
184 | bp->b_flags |= B_READ; |
185 | error = nfs_doio(bp); |
186 | if (error) { |
187 | brelse(bp, 0); |
188 | return (error); |
189 | } |
190 | } |
191 | n = MIN(uio->uio_resid, MAXPATHLEN - bp->b_resid); |
192 | got_buf = 1; |
193 | on = 0; |
194 | break; |
195 | case VDIR: |
196 | diragain: |
197 | nfsstats.biocache_readdirs++; |
198 | ndp = nfs_searchdircache(vp, uio->uio_offset, |
199 | (nmp->nm_flag & NFSMNT_XLATECOOKIE), 0); |
200 | if (!ndp) { |
201 | /* |
202 | * We've been handed a cookie that is not |
203 | * in the cache. If we're not translating |
204 | * 32 <-> 64, it may be a value that was |
205 | * flushed out of the cache because it grew |
206 | * too big. Let the server judge if it's |
207 | * valid or not. In the translation case, |
208 | * we have no way of validating this value, |
209 | * so punt. |
210 | */ |
211 | if (nmp->nm_flag & NFSMNT_XLATECOOKIE) |
212 | return (EINVAL); |
213 | ndp = nfs_enterdircache(vp, uio->uio_offset, |
214 | uio->uio_offset, 0, 0); |
215 | } |
216 | |
217 | if (NFS_EOFVALID(np) && |
218 | ndp->dc_cookie == np->n_direofoffset) { |
219 | nfs_putdircache(np, ndp); |
220 | nfsstats.direofcache_hits++; |
221 | return (0); |
222 | } |
223 | |
224 | bp = nfs_getcacheblk(vp, NFSDC_BLKNO(ndp), NFS_DIRBLKSIZ, l); |
225 | if (!bp) |
226 | return (EINTR); |
227 | if ((bp->b_oflags & BO_DONE) == 0) { |
228 | bp->b_flags |= B_READ; |
229 | bp->b_dcookie = ndp->dc_blkcookie; |
230 | error = nfs_doio(bp); |
231 | if (error) { |
232 | /* |
233 | * Yuck! The directory has been modified on the |
234 | * server. Punt and let the userland code |
235 | * deal with it. |
236 | */ |
237 | nfs_putdircache(np, ndp); |
238 | brelse(bp, 0); |
239 | /* |
240 | * nfs_request maps NFSERR_BAD_COOKIE to EINVAL. |
241 | */ |
242 | if (error == EINVAL) { /* NFSERR_BAD_COOKIE */ |
243 | nfs_invaldircache(vp, 0); |
244 | nfs_vinvalbuf(vp, 0, cred, l, 1); |
245 | } |
246 | return (error); |
247 | } |
248 | } |
249 | |
250 | /* |
251 | * Just return if we hit EOF right away with this |
252 | * block. Always check here, because direofoffset |
253 | * may have been set by an nfsiod since the last |
254 | * check. |
255 | * |
256 | * also, empty block implies EOF. |
257 | */ |
258 | |
259 | if (bp->b_bcount == bp->b_resid || |
260 | (NFS_EOFVALID(np) && |
261 | ndp->dc_blkcookie == np->n_direofoffset)) { |
262 | KASSERT(bp->b_bcount != bp->b_resid || |
263 | ndp->dc_blkcookie == bp->b_dcookie); |
264 | nfs_putdircache(np, ndp); |
265 | brelse(bp, BC_NOCACHE); |
266 | return 0; |
267 | } |
268 | |
269 | /* |
270 | * Find the entry we were looking for in the block. |
271 | */ |
272 | |
273 | en = ndp->dc_entry; |
274 | |
275 | pdp = dp = (struct dirent *)bp->b_data; |
276 | edp = (struct dirent *)(void *)((char *)bp->b_data + bp->b_bcount - |
277 | bp->b_resid); |
278 | enn = 0; |
279 | while (enn < en && dp < edp) { |
280 | pdp = dp; |
281 | dp = _DIRENT_NEXT(dp); |
282 | enn++; |
283 | } |
284 | |
285 | /* |
286 | * If the entry number was bigger than the number of |
287 | * entries in the block, or the cookie of the previous |
288 | * entry doesn't match, the directory cache is |
289 | * stale. Flush it and try again (i.e. go to |
290 | * the server). |
291 | */ |
292 | if (dp >= edp || (struct dirent *)_DIRENT_NEXT(dp) > edp || |
293 | (en > 0 && NFS_GETCOOKIE(pdp) != ndp->dc_cookie)) { |
294 | #ifdef DEBUG |
295 | printf("invalid cache: %p %p %p off %jx %jx\n" , |
296 | pdp, dp, edp, |
297 | (uintmax_t)uio->uio_offset, |
298 | (uintmax_t)NFS_GETCOOKIE(pdp)); |
299 | #endif |
300 | nfs_putdircache(np, ndp); |
301 | brelse(bp, 0); |
302 | nfs_invaldircache(vp, 0); |
303 | nfs_vinvalbuf(vp, 0, cred, l, 0); |
304 | goto diragain; |
305 | } |
306 | |
307 | on = (char *)dp - (char *)bp->b_data; |
308 | |
309 | /* |
310 | * Cache all entries that may be exported to the |
311 | * user, as they may be thrown back at us. The |
312 | * NFSBIO_CACHECOOKIES flag indicates that all |
313 | * entries are being 'exported', so cache them all. |
314 | */ |
315 | |
316 | if (en == 0 && pdp == dp) { |
317 | dp = _DIRENT_NEXT(dp); |
318 | enn++; |
319 | } |
320 | |
321 | if (uio->uio_resid < (bp->b_bcount - bp->b_resid - on)) { |
322 | n = uio->uio_resid; |
323 | enough = 1; |
324 | } else |
325 | n = bp->b_bcount - bp->b_resid - on; |
326 | |
327 | ep = (struct dirent *)(void *)((char *)bp->b_data + on + n); |
328 | |
329 | /* |
330 | * Find last complete entry to copy, caching entries |
331 | * (if requested) as we go. |
332 | */ |
333 | |
334 | while (dp < ep && (struct dirent *)_DIRENT_NEXT(dp) <= ep) { |
335 | if (cflag & NFSBIO_CACHECOOKIES) { |
336 | nndp = nfs_enterdircache(vp, NFS_GETCOOKIE(pdp), |
337 | ndp->dc_blkcookie, enn, bp->b_lblkno); |
338 | if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { |
339 | NFS_STASHCOOKIE32(pdp, |
340 | nndp->dc_cookie32); |
341 | } |
342 | nfs_putdircache(np, nndp); |
343 | } |
344 | pdp = dp; |
345 | dp = _DIRENT_NEXT(dp); |
346 | enn++; |
347 | } |
348 | nfs_putdircache(np, ndp); |
349 | |
350 | /* |
351 | * If the last requested entry was not the last in the |
352 | * buffer (happens if NFS_DIRFRAGSIZ < NFS_DIRBLKSIZ), |
353 | * cache the cookie of the last requested one, and |
354 | * set of the offset to it. |
355 | */ |
356 | |
357 | if ((on + n) < bp->b_bcount - bp->b_resid) { |
358 | curoff = NFS_GETCOOKIE(pdp); |
359 | nndp = nfs_enterdircache(vp, curoff, ndp->dc_blkcookie, |
360 | enn, bp->b_lblkno); |
361 | if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { |
362 | NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); |
363 | curoff = nndp->dc_cookie32; |
364 | } |
365 | nfs_putdircache(np, nndp); |
366 | } else |
367 | curoff = bp->b_dcookie; |
368 | |
369 | /* |
370 | * Always cache the entry for the next block, |
371 | * so that readaheads can use it. |
372 | */ |
373 | nndp = nfs_enterdircache(vp, bp->b_dcookie, bp->b_dcookie, 0,0); |
374 | if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { |
375 | if (curoff == bp->b_dcookie) { |
376 | NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); |
377 | curoff = nndp->dc_cookie32; |
378 | } |
379 | } |
380 | |
381 | n = (char *)_DIRENT_NEXT(pdp) - ((char *)bp->b_data + on); |
382 | |
383 | /* |
384 | * If not eof and read aheads are enabled, start one. |
385 | * (You need the current block first, so that you have the |
386 | * directory offset cookie of the next block.) |
387 | */ |
388 | if (nfs_numasync > 0 && nmp->nm_readahead > 0 && |
389 | !NFS_EOFVALID(np)) { |
390 | rabp = nfs_getcacheblk(vp, NFSDC_BLKNO(nndp), |
391 | NFS_DIRBLKSIZ, l); |
392 | if (rabp) { |
393 | if ((rabp->b_oflags & (BO_DONE | BO_DELWRI)) == 0) { |
394 | rabp->b_dcookie = nndp->dc_cookie; |
395 | rabp->b_flags |= (B_READ | B_ASYNC); |
396 | if (nfs_asyncio(rabp)) { |
397 | brelse(rabp, BC_INVAL); |
398 | } |
399 | } else |
400 | brelse(rabp, 0); |
401 | } |
402 | } |
403 | nfs_putdircache(np, nndp); |
404 | got_buf = 1; |
405 | break; |
406 | default: |
407 | printf(" nfsbioread: type %x unexpected\n" ,vp->v_type); |
408 | break; |
409 | } |
410 | |
411 | if (n > 0) { |
412 | if (!baddr) |
413 | baddr = bp->b_data; |
414 | error = uiomove((char *)baddr + on, (int)n, uio); |
415 | } |
416 | switch (vp->v_type) { |
417 | case VREG: |
418 | break; |
419 | case VLNK: |
420 | n = 0; |
421 | break; |
422 | case VDIR: |
423 | uio->uio_offset = curoff; |
424 | if (enough) |
425 | n = 0; |
426 | break; |
427 | default: |
428 | printf(" nfsbioread: type %x unexpected\n" ,vp->v_type); |
429 | } |
430 | if (got_buf) |
431 | brelse(bp, 0); |
432 | } while (error == 0 && uio->uio_resid > 0 && n > 0); |
433 | return (error); |
434 | } |
435 | |
436 | /* |
437 | * Vnode op for write using bio |
438 | */ |
439 | int |
440 | nfs_write(void *v) |
441 | { |
442 | struct vop_write_args /* { |
443 | struct vnode *a_vp; |
444 | struct uio *a_uio; |
445 | int a_ioflag; |
446 | kauth_cred_t a_cred; |
447 | } */ *ap = v; |
448 | struct uio *uio = ap->a_uio; |
449 | struct lwp *l = curlwp; |
450 | struct vnode *vp = ap->a_vp; |
451 | struct nfsnode *np = VTONFS(vp); |
452 | kauth_cred_t cred = ap->a_cred; |
453 | struct nfsmount *nmp = VFSTONFS(vp->v_mount); |
454 | voff_t oldoff, origoff; |
455 | vsize_t bytelen; |
456 | int error = 0; |
457 | int ioflag = ap->a_ioflag; |
458 | int extended = 0, wrotedata = 0; |
459 | |
460 | #ifdef DIAGNOSTIC |
461 | if (uio->uio_rw != UIO_WRITE) |
462 | panic("nfs_write mode" ); |
463 | #endif |
464 | if (vp->v_type != VREG) |
465 | return (EIO); |
466 | if (np->n_flag & NWRITEERR) { |
467 | np->n_flag &= ~NWRITEERR; |
468 | return (np->n_error); |
469 | } |
470 | #ifndef NFS_V2_ONLY |
471 | if ((nmp->nm_flag & NFSMNT_NFSV3) && |
472 | !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) |
473 | (void)nfs_fsinfo(nmp, vp, cred, l); |
474 | #endif |
475 | if (ioflag & IO_APPEND) { |
476 | NFS_INVALIDATE_ATTRCACHE(np); |
477 | error = nfs_flushstalebuf(vp, cred, l, |
478 | NFS_FLUSHSTALEBUF_MYWRITE); |
479 | if (error) |
480 | return (error); |
481 | uio->uio_offset = np->n_size; |
482 | |
483 | /* |
484 | * This is already checked above VOP_WRITE, but recheck |
485 | * the append case here to make sure our idea of the |
486 | * file size is as fresh as possible. |
487 | */ |
488 | if (uio->uio_offset + uio->uio_resid > |
489 | l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { |
490 | mutex_enter(proc_lock); |
491 | psignal(l->l_proc, SIGXFSZ); |
492 | mutex_exit(proc_lock); |
493 | return (EFBIG); |
494 | } |
495 | } |
496 | if (uio->uio_offset < 0) |
497 | return (EINVAL); |
498 | if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) |
499 | return (EFBIG); |
500 | if (uio->uio_resid == 0) |
501 | return (0); |
502 | |
503 | origoff = uio->uio_offset; |
504 | do { |
505 | bool overwrite; /* if we are overwriting whole pages */ |
506 | u_quad_t oldsize; |
507 | oldoff = uio->uio_offset; |
508 | bytelen = uio->uio_resid; |
509 | |
510 | nfsstats.biocache_writes++; |
511 | |
512 | oldsize = np->n_size; |
513 | np->n_flag |= NMODIFIED; |
514 | if (np->n_size < uio->uio_offset + bytelen) { |
515 | np->n_size = uio->uio_offset + bytelen; |
516 | } |
517 | overwrite = false; |
518 | if ((uio->uio_offset & PAGE_MASK) == 0) { |
519 | if ((vp->v_vflag & VV_MAPPED) == 0 && |
520 | bytelen > PAGE_SIZE) { |
521 | bytelen = trunc_page(bytelen); |
522 | overwrite = true; |
523 | } else if ((bytelen & PAGE_MASK) == 0 && |
524 | uio->uio_offset >= vp->v_size) { |
525 | overwrite = true; |
526 | } |
527 | } |
528 | if (vp->v_size < uio->uio_offset + bytelen) { |
529 | uvm_vnp_setwritesize(vp, uio->uio_offset + bytelen); |
530 | } |
531 | error = ubc_uiomove(&vp->v_uobj, uio, bytelen, |
532 | UVM_ADV_RANDOM, UBC_WRITE | UBC_PARTIALOK | |
533 | (overwrite ? UBC_FAULTBUSY : 0) | |
534 | UBC_UNMAP_FLAG(vp)); |
535 | if (error) { |
536 | uvm_vnp_setwritesize(vp, vp->v_size); |
537 | if (overwrite && np->n_size != oldsize) { |
538 | /* |
539 | * backout size and free pages past eof. |
540 | */ |
541 | np->n_size = oldsize; |
542 | mutex_enter(vp->v_interlock); |
543 | (void)VOP_PUTPAGES(vp, round_page(vp->v_size), |
544 | 0, PGO_SYNCIO | PGO_FREE); |
545 | } |
546 | break; |
547 | } |
548 | wrotedata = 1; |
549 | |
550 | /* |
551 | * update UVM's notion of the size now that we've |
552 | * copied the data into the vnode's pages. |
553 | */ |
554 | |
555 | if (vp->v_size < uio->uio_offset) { |
556 | uvm_vnp_setsize(vp, uio->uio_offset); |
557 | extended = 1; |
558 | } |
559 | |
560 | if ((oldoff & ~(nmp->nm_wsize - 1)) != |
561 | (uio->uio_offset & ~(nmp->nm_wsize - 1))) { |
562 | mutex_enter(vp->v_interlock); |
563 | error = VOP_PUTPAGES(vp, |
564 | trunc_page(oldoff & ~(nmp->nm_wsize - 1)), |
565 | round_page((uio->uio_offset + nmp->nm_wsize - 1) & |
566 | ~(nmp->nm_wsize - 1)), PGO_CLEANIT); |
567 | } |
568 | } while (uio->uio_resid > 0); |
569 | if (wrotedata) |
570 | VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); |
571 | if (error == 0 && (ioflag & IO_SYNC) != 0) { |
572 | mutex_enter(vp->v_interlock); |
573 | error = VOP_PUTPAGES(vp, |
574 | trunc_page(origoff & ~(nmp->nm_wsize - 1)), |
575 | round_page((uio->uio_offset + nmp->nm_wsize - 1) & |
576 | ~(nmp->nm_wsize - 1)), |
577 | PGO_CLEANIT | PGO_SYNCIO); |
578 | } |
579 | return error; |
580 | } |
581 | |
582 | /* |
583 | * Get an nfs cache block. |
584 | * Allocate a new one if the block isn't currently in the cache |
585 | * and return the block marked busy. If the calling process is |
586 | * interrupted by a signal for an interruptible mount point, return |
587 | * NULL. |
588 | */ |
589 | struct buf * |
590 | nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct lwp *l) |
591 | { |
592 | struct buf *bp; |
593 | struct nfsmount *nmp = VFSTONFS(vp->v_mount); |
594 | |
595 | if (nmp->nm_flag & NFSMNT_INT) { |
596 | bp = getblk(vp, bn, size, PCATCH, 0); |
597 | while (bp == NULL) { |
598 | if (nfs_sigintr(nmp, NULL, l)) |
599 | return (NULL); |
600 | bp = getblk(vp, bn, size, 0, 2 * hz); |
601 | } |
602 | } else |
603 | bp = getblk(vp, bn, size, 0, 0); |
604 | return (bp); |
605 | } |
606 | |
607 | /* |
608 | * Flush and invalidate all dirty buffers. If another process is already |
609 | * doing the flush, just wait for completion. |
610 | */ |
611 | int |
612 | nfs_vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, |
613 | struct lwp *l, int intrflg) |
614 | { |
615 | struct nfsnode *np = VTONFS(vp); |
616 | struct nfsmount *nmp = VFSTONFS(vp->v_mount); |
617 | int error = 0, allerror = 0, slptimeo; |
618 | bool catch_p; |
619 | |
620 | if ((nmp->nm_flag & NFSMNT_INT) == 0) |
621 | intrflg = 0; |
622 | if (intrflg) { |
623 | catch_p = true; |
624 | slptimeo = 2 * hz; |
625 | } else { |
626 | catch_p = false; |
627 | if (nmp->nm_flag & NFSMNT_SOFT) |
628 | slptimeo = nmp->nm_retry * nmp->nm_timeo; |
629 | else |
630 | slptimeo = 0; |
631 | } |
632 | /* |
633 | * First wait for any other process doing a flush to complete. |
634 | */ |
635 | mutex_enter(vp->v_interlock); |
636 | while (np->n_flag & NFLUSHINPROG) { |
637 | np->n_flag |= NFLUSHWANT; |
638 | error = mtsleep(&np->n_flag, PRIBIO + 2, "nfsvinval" , |
639 | slptimeo, vp->v_interlock); |
640 | if (error && intrflg && nfs_sigintr(nmp, NULL, l)) { |
641 | mutex_exit(vp->v_interlock); |
642 | return EINTR; |
643 | } |
644 | } |
645 | |
646 | /* |
647 | * Now, flush as required. |
648 | */ |
649 | np->n_flag |= NFLUSHINPROG; |
650 | mutex_exit(vp->v_interlock); |
651 | error = vinvalbuf(vp, flags, cred, l, catch_p, 0); |
652 | while (error) { |
653 | if (allerror == 0) |
654 | allerror = error; |
655 | if (intrflg && nfs_sigintr(nmp, NULL, l)) { |
656 | error = EINTR; |
657 | break; |
658 | } |
659 | error = vinvalbuf(vp, flags, cred, l, 0, slptimeo); |
660 | } |
661 | mutex_enter(vp->v_interlock); |
662 | if (allerror != 0) { |
663 | /* |
664 | * Keep error from vinvalbuf so fsync/close will know. |
665 | */ |
666 | np->n_error = allerror; |
667 | np->n_flag |= NWRITEERR; |
668 | } |
669 | if (error == 0) |
670 | np->n_flag &= ~NMODIFIED; |
671 | np->n_flag &= ~NFLUSHINPROG; |
672 | if (np->n_flag & NFLUSHWANT) { |
673 | np->n_flag &= ~NFLUSHWANT; |
674 | wakeup(&np->n_flag); |
675 | } |
676 | mutex_exit(vp->v_interlock); |
677 | return error; |
678 | } |
679 | |
680 | /* |
681 | * nfs_flushstalebuf: flush cache if it's stale. |
682 | * |
683 | * => caller shouldn't own any pages or buffers which belong to the vnode. |
684 | */ |
685 | |
686 | int |
687 | nfs_flushstalebuf(struct vnode *vp, kauth_cred_t cred, struct lwp *l, |
688 | int flags) |
689 | { |
690 | struct nfsnode *np = VTONFS(vp); |
691 | struct vattr vattr; |
692 | int error; |
693 | |
694 | if (np->n_flag & NMODIFIED) { |
695 | if ((flags & NFS_FLUSHSTALEBUF_MYWRITE) == 0 |
696 | || vp->v_type != VREG) { |
697 | error = nfs_vinvalbuf(vp, V_SAVE, cred, l, 1); |
698 | if (error) |
699 | return error; |
700 | if (vp->v_type == VDIR) { |
701 | nfs_invaldircache(vp, 0); |
702 | } |
703 | } else { |
704 | /* |
705 | * XXX assuming writes are ours. |
706 | */ |
707 | } |
708 | NFS_INVALIDATE_ATTRCACHE(np); |
709 | error = VOP_GETATTR(vp, &vattr, cred); |
710 | if (error) |
711 | return error; |
712 | np->n_mtime = vattr.va_mtime; |
713 | } else { |
714 | error = VOP_GETATTR(vp, &vattr, cred); |
715 | if (error) |
716 | return error; |
717 | if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) { |
718 | if (vp->v_type == VDIR) { |
719 | nfs_invaldircache(vp, 0); |
720 | } |
721 | error = nfs_vinvalbuf(vp, V_SAVE, cred, l, 1); |
722 | if (error) |
723 | return error; |
724 | np->n_mtime = vattr.va_mtime; |
725 | } |
726 | } |
727 | |
728 | return error; |
729 | } |
730 | |
731 | /* |
732 | * Initiate asynchronous I/O. Return an error if no nfsiods are available. |
733 | * This is mainly to avoid queueing async I/O requests when the nfsiods |
734 | * are all hung on a dead server. |
735 | */ |
736 | |
737 | int |
738 | nfs_asyncio(struct buf *bp) |
739 | { |
740 | struct nfs_iod *iod; |
741 | struct nfsmount *nmp; |
742 | int slptimeo = 0, error; |
743 | bool catch_p = false; |
744 | |
745 | if (nfs_numasync == 0) |
746 | return (EIO); |
747 | |
748 | nmp = VFSTONFS(bp->b_vp->v_mount); |
749 | |
750 | if (nmp->nm_flag & NFSMNT_SOFT) |
751 | slptimeo = nmp->nm_retry * nmp->nm_timeo; |
752 | |
753 | if (nmp->nm_iflag & NFSMNT_DISMNTFORCE) |
754 | slptimeo = hz; |
755 | |
756 | again: |
757 | if (nmp->nm_flag & NFSMNT_INT) |
758 | catch_p = true; |
759 | |
760 | /* |
761 | * Find a free iod to process this request. |
762 | */ |
763 | |
764 | mutex_enter(&nfs_iodlist_lock); |
765 | iod = LIST_FIRST(&nfs_iodlist_idle); |
766 | if (iod) { |
767 | /* |
768 | * Found one, so wake it up and tell it which |
769 | * mount to process. |
770 | */ |
771 | LIST_REMOVE(iod, nid_idle); |
772 | mutex_enter(&iod->nid_lock); |
773 | mutex_exit(&nfs_iodlist_lock); |
774 | KASSERT(iod->nid_mount == NULL); |
775 | iod->nid_mount = nmp; |
776 | cv_signal(&iod->nid_cv); |
777 | mutex_enter(&nmp->nm_lock); |
778 | mutex_exit(&iod->nid_lock); |
779 | nmp->nm_bufqiods++; |
780 | if (nmp->nm_bufqlen < 2 * nmp->nm_bufqiods) { |
781 | cv_broadcast(&nmp->nm_aiocv); |
782 | } |
783 | } else { |
784 | mutex_exit(&nfs_iodlist_lock); |
785 | mutex_enter(&nmp->nm_lock); |
786 | } |
787 | |
788 | KASSERT(mutex_owned(&nmp->nm_lock)); |
789 | |
790 | /* |
791 | * If we have an iod which can process the request, then queue |
792 | * the buffer. However, even if we have an iod, do not initiate |
793 | * queue cleaning if curproc is the pageout daemon. if the NFS mount |
794 | * is via local loopback, we may put curproc (pagedaemon) to sleep |
795 | * waiting for the writes to complete. But the server (ourself) |
796 | * may block the write, waiting for its (ie., our) pagedaemon |
797 | * to produce clean pages to handle the write: deadlock. |
798 | * XXX: start non-loopback mounts straight away? If "lots free", |
799 | * let pagedaemon start loopback writes anyway? |
800 | */ |
801 | if (nmp->nm_bufqiods > 0) { |
802 | |
803 | /* |
804 | * Ensure that the queue never grows too large. |
805 | */ |
806 | if (curlwp == uvm.pagedaemon_lwp) { |
807 | /* Enque for later, to avoid free-page deadlock */ |
808 | } else while (nmp->nm_bufqlen >= 2 * nmp->nm_bufqiods) { |
809 | if (catch_p) { |
810 | error = cv_timedwait_sig(&nmp->nm_aiocv, |
811 | &nmp->nm_lock, slptimeo); |
812 | } else { |
813 | error = cv_timedwait(&nmp->nm_aiocv, |
814 | &nmp->nm_lock, slptimeo); |
815 | } |
816 | if (error) { |
817 | if (error == EWOULDBLOCK && |
818 | nmp->nm_flag & NFSMNT_SOFT) { |
819 | mutex_exit(&nmp->nm_lock); |
820 | bp->b_error = EIO; |
821 | return (EIO); |
822 | } |
823 | |
824 | if (nfs_sigintr(nmp, NULL, curlwp)) { |
825 | mutex_exit(&nmp->nm_lock); |
826 | return (EINTR); |
827 | } |
828 | if (catch_p) { |
829 | catch_p = false; |
830 | slptimeo = 2 * hz; |
831 | } |
832 | } |
833 | |
834 | /* |
835 | * We might have lost our iod while sleeping, |
836 | * so check and loop if necessary. |
837 | */ |
838 | |
839 | if (nmp->nm_bufqiods == 0) { |
840 | mutex_exit(&nmp->nm_lock); |
841 | goto again; |
842 | } |
843 | } |
844 | TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); |
845 | nmp->nm_bufqlen++; |
846 | mutex_exit(&nmp->nm_lock); |
847 | return (0); |
848 | } |
849 | mutex_exit(&nmp->nm_lock); |
850 | |
851 | /* |
852 | * All the iods are busy on other mounts, so return EIO to |
853 | * force the caller to process the i/o synchronously. |
854 | */ |
855 | |
856 | return (EIO); |
857 | } |
858 | |
859 | /* |
860 | * nfs_doio for read. |
861 | */ |
862 | static int |
863 | nfs_doio_read(struct buf *bp, struct uio *uiop) |
864 | { |
865 | struct vnode *vp = bp->b_vp; |
866 | struct nfsnode *np = VTONFS(vp); |
867 | struct nfsmount *nmp = VFSTONFS(vp->v_mount); |
868 | int error = 0; |
869 | |
870 | uiop->uio_rw = UIO_READ; |
871 | switch (vp->v_type) { |
872 | case VREG: |
873 | nfsstats.read_bios++; |
874 | error = nfs_readrpc(vp, uiop); |
875 | if (!error && uiop->uio_resid) { |
876 | int diff, len; |
877 | |
878 | /* |
879 | * If uio_resid > 0, there is a hole in the file and |
880 | * no writes after the hole have been pushed to |
881 | * the server yet or the file has been truncated |
882 | * on the server. |
883 | * Just zero fill the rest of the valid area. |
884 | */ |
885 | |
886 | KASSERT(vp->v_size >= |
887 | uiop->uio_offset + uiop->uio_resid); |
888 | diff = bp->b_bcount - uiop->uio_resid; |
889 | len = uiop->uio_resid; |
890 | memset((char *)bp->b_data + diff, 0, len); |
891 | uiop->uio_resid = 0; |
892 | } |
893 | #if 0 |
894 | if (uiop->uio_lwp && (vp->v_iflag & VI_TEXT) && |
895 | timespeccmp(&np->n_mtime, &np->n_vattr->va_mtime, !=)) { |
896 | mutex_enter(proc_lock); |
897 | killproc(uiop->uio_lwp->l_proc, "process text file was modified" ); |
898 | mutex_exit(proc_lock); |
899 | #if 0 /* XXX NJWLWP */ |
900 | uiop->uio_lwp->l_proc->p_holdcnt++; |
901 | #endif |
902 | } |
903 | #endif |
904 | break; |
905 | case VLNK: |
906 | KASSERT(uiop->uio_offset == (off_t)0); |
907 | nfsstats.readlink_bios++; |
908 | error = nfs_readlinkrpc(vp, uiop, np->n_rcred); |
909 | break; |
910 | case VDIR: |
911 | nfsstats.readdir_bios++; |
912 | uiop->uio_offset = bp->b_dcookie; |
913 | #ifndef NFS_V2_ONLY |
914 | if (nmp->nm_flag & NFSMNT_RDIRPLUS) { |
915 | error = nfs_readdirplusrpc(vp, uiop, |
916 | curlwp->l_cred); |
917 | /* |
918 | * nfs_request maps NFSERR_NOTSUPP to ENOTSUP. |
919 | */ |
920 | if (error == ENOTSUP) |
921 | nmp->nm_flag &= ~NFSMNT_RDIRPLUS; |
922 | } |
923 | #else |
924 | nmp->nm_flag &= ~NFSMNT_RDIRPLUS; |
925 | #endif |
926 | if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) |
927 | error = nfs_readdirrpc(vp, uiop, |
928 | curlwp->l_cred); |
929 | if (!error) { |
930 | bp->b_dcookie = uiop->uio_offset; |
931 | } |
932 | break; |
933 | default: |
934 | printf("nfs_doio: type %x unexpected\n" , vp->v_type); |
935 | break; |
936 | } |
937 | bp->b_error = error; |
938 | return error; |
939 | } |
940 | |
941 | /* |
942 | * nfs_doio for write. |
943 | */ |
944 | static int |
945 | nfs_doio_write(struct buf *bp, struct uio *uiop) |
946 | { |
947 | struct vnode *vp = bp->b_vp; |
948 | struct nfsnode *np = VTONFS(vp); |
949 | struct nfsmount *nmp = VFSTONFS(vp->v_mount); |
950 | int iomode; |
951 | bool stalewriteverf = false; |
952 | int i, npages = (bp->b_bcount + PAGE_SIZE - 1) >> PAGE_SHIFT; |
953 | struct vm_page **pgs, *spgs[UBC_MAX_PAGES]; |
954 | #ifndef NFS_V2_ONLY |
955 | bool needcommit = true; /* need only COMMIT RPC */ |
956 | #else |
957 | bool needcommit = false; /* need only COMMIT RPC */ |
958 | #endif |
959 | bool pageprotected; |
960 | struct uvm_object *uobj = &vp->v_uobj; |
961 | int error; |
962 | off_t off, cnt; |
963 | |
964 | if (npages < __arraycount(spgs)) |
965 | pgs = spgs; |
966 | else { |
967 | if ((pgs = kmem_alloc(sizeof(*pgs) * npages, KM_NOSLEEP)) == |
968 | NULL) |
969 | return ENOMEM; |
970 | } |
971 | |
972 | if ((bp->b_flags & B_ASYNC) != 0 && NFS_ISV3(vp)) { |
973 | iomode = NFSV3WRITE_UNSTABLE; |
974 | } else { |
975 | iomode = NFSV3WRITE_FILESYNC; |
976 | } |
977 | |
978 | #ifndef NFS_V2_ONLY |
979 | again: |
980 | #endif |
981 | rw_enter(&nmp->nm_writeverflock, RW_READER); |
982 | |
983 | for (i = 0; i < npages; i++) { |
984 | pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); |
985 | if (pgs[i]->uobject == uobj && |
986 | pgs[i]->offset == uiop->uio_offset + (i << PAGE_SHIFT)) { |
987 | KASSERT(pgs[i]->flags & PG_BUSY); |
988 | /* |
989 | * this page belongs to our object. |
990 | */ |
991 | mutex_enter(uobj->vmobjlock); |
992 | /* |
993 | * write out the page stably if it's about to |
994 | * be released because we can't resend it |
995 | * on the server crash. |
996 | * |
997 | * XXX assuming PG_RELEASE|PG_PAGEOUT won't be |
998 | * changed until unbusy the page. |
999 | */ |
1000 | if (pgs[i]->flags & (PG_RELEASED|PG_PAGEOUT)) |
1001 | iomode = NFSV3WRITE_FILESYNC; |
1002 | /* |
1003 | * if we met a page which hasn't been sent yet, |
1004 | * we need do WRITE RPC. |
1005 | */ |
1006 | if ((pgs[i]->flags & PG_NEEDCOMMIT) == 0) |
1007 | needcommit = false; |
1008 | mutex_exit(uobj->vmobjlock); |
1009 | } else { |
1010 | iomode = NFSV3WRITE_FILESYNC; |
1011 | needcommit = false; |
1012 | } |
1013 | } |
1014 | if (!needcommit && iomode == NFSV3WRITE_UNSTABLE) { |
1015 | mutex_enter(uobj->vmobjlock); |
1016 | for (i = 0; i < npages; i++) { |
1017 | pgs[i]->flags |= PG_NEEDCOMMIT | PG_RDONLY; |
1018 | pmap_page_protect(pgs[i], VM_PROT_READ); |
1019 | } |
1020 | mutex_exit(uobj->vmobjlock); |
1021 | pageprotected = true; /* pages can't be modified during i/o. */ |
1022 | } else |
1023 | pageprotected = false; |
1024 | |
1025 | /* |
1026 | * Send the data to the server if necessary, |
1027 | * otherwise just send a commit rpc. |
1028 | */ |
1029 | #ifndef NFS_V2_ONLY |
1030 | if (needcommit) { |
1031 | |
1032 | /* |
1033 | * If the buffer is in the range that we already committed, |
1034 | * there's nothing to do. |
1035 | * |
1036 | * If it's in the range that we need to commit, push the |
1037 | * whole range at once, otherwise only push the buffer. |
1038 | * In both these cases, acquire the commit lock to avoid |
1039 | * other processes modifying the range. |
1040 | */ |
1041 | |
1042 | off = uiop->uio_offset; |
1043 | cnt = bp->b_bcount; |
1044 | mutex_enter(&np->n_commitlock); |
1045 | if (!nfs_in_committed_range(vp, off, bp->b_bcount)) { |
1046 | bool pushedrange; |
1047 | if (nfs_in_tobecommitted_range(vp, off, bp->b_bcount)) { |
1048 | pushedrange = true; |
1049 | off = np->n_pushlo; |
1050 | cnt = np->n_pushhi - np->n_pushlo; |
1051 | } else { |
1052 | pushedrange = false; |
1053 | } |
1054 | error = nfs_commit(vp, off, cnt, curlwp); |
1055 | if (error == 0) { |
1056 | if (pushedrange) { |
1057 | nfs_merge_commit_ranges(vp); |
1058 | } else { |
1059 | nfs_add_committed_range(vp, off, cnt); |
1060 | } |
1061 | } |
1062 | } else { |
1063 | error = 0; |
1064 | } |
1065 | mutex_exit(&np->n_commitlock); |
1066 | rw_exit(&nmp->nm_writeverflock); |
1067 | if (!error) { |
1068 | /* |
1069 | * pages are now on stable storage. |
1070 | */ |
1071 | uiop->uio_resid = 0; |
1072 | mutex_enter(uobj->vmobjlock); |
1073 | for (i = 0; i < npages; i++) { |
1074 | pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); |
1075 | } |
1076 | mutex_exit(uobj->vmobjlock); |
1077 | goto out; |
1078 | } else if (error == NFSERR_STALEWRITEVERF) { |
1079 | nfs_clearcommit(vp->v_mount); |
1080 | goto again; |
1081 | } |
1082 | if (error) { |
1083 | bp->b_error = np->n_error = error; |
1084 | np->n_flag |= NWRITEERR; |
1085 | } |
1086 | goto out; |
1087 | } |
1088 | #endif |
1089 | off = uiop->uio_offset; |
1090 | cnt = bp->b_bcount; |
1091 | uiop->uio_rw = UIO_WRITE; |
1092 | nfsstats.write_bios++; |
1093 | error = nfs_writerpc(vp, uiop, &iomode, pageprotected, &stalewriteverf); |
1094 | #ifndef NFS_V2_ONLY |
1095 | if (!error && iomode == NFSV3WRITE_UNSTABLE) { |
1096 | /* |
1097 | * we need to commit pages later. |
1098 | */ |
1099 | mutex_enter(&np->n_commitlock); |
1100 | nfs_add_tobecommitted_range(vp, off, cnt); |
1101 | /* |
1102 | * if there can be too many uncommitted pages, commit them now. |
1103 | */ |
1104 | if (np->n_pushhi - np->n_pushlo > nfs_commitsize) { |
1105 | off = np->n_pushlo; |
1106 | cnt = nfs_commitsize >> 1; |
1107 | error = nfs_commit(vp, off, cnt, curlwp); |
1108 | if (!error) { |
1109 | nfs_add_committed_range(vp, off, cnt); |
1110 | nfs_del_tobecommitted_range(vp, off, cnt); |
1111 | } |
1112 | if (error == NFSERR_STALEWRITEVERF) { |
1113 | stalewriteverf = true; |
1114 | error = 0; /* it isn't a real error */ |
1115 | } |
1116 | } else { |
1117 | /* |
1118 | * re-dirty pages so that they will be passed |
1119 | * to us later again. |
1120 | */ |
1121 | mutex_enter(uobj->vmobjlock); |
1122 | for (i = 0; i < npages; i++) { |
1123 | pgs[i]->flags &= ~PG_CLEAN; |
1124 | } |
1125 | mutex_exit(uobj->vmobjlock); |
1126 | } |
1127 | mutex_exit(&np->n_commitlock); |
1128 | } else |
1129 | #endif |
1130 | if (!error) { |
1131 | /* |
1132 | * pages are now on stable storage. |
1133 | */ |
1134 | mutex_enter(&np->n_commitlock); |
1135 | nfs_del_committed_range(vp, off, cnt); |
1136 | mutex_exit(&np->n_commitlock); |
1137 | mutex_enter(uobj->vmobjlock); |
1138 | for (i = 0; i < npages; i++) { |
1139 | pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); |
1140 | } |
1141 | mutex_exit(uobj->vmobjlock); |
1142 | } else { |
1143 | /* |
1144 | * we got an error. |
1145 | */ |
1146 | bp->b_error = np->n_error = error; |
1147 | np->n_flag |= NWRITEERR; |
1148 | } |
1149 | |
1150 | rw_exit(&nmp->nm_writeverflock); |
1151 | |
1152 | |
1153 | if (stalewriteverf) { |
1154 | nfs_clearcommit(vp->v_mount); |
1155 | } |
1156 | #ifndef NFS_V2_ONLY |
1157 | out: |
1158 | #endif |
1159 | if (pgs != spgs) |
1160 | kmem_free(pgs, sizeof(*pgs) * npages); |
1161 | return error; |
1162 | } |
1163 | |
1164 | /* |
1165 | * nfs_doio for B_PHYS. |
1166 | */ |
1167 | static int |
1168 | nfs_doio_phys(struct buf *bp, struct uio *uiop) |
1169 | { |
1170 | struct vnode *vp = bp->b_vp; |
1171 | int error; |
1172 | |
1173 | uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; |
1174 | if (bp->b_flags & B_READ) { |
1175 | uiop->uio_rw = UIO_READ; |
1176 | nfsstats.read_physios++; |
1177 | error = nfs_readrpc(vp, uiop); |
1178 | } else { |
1179 | int iomode = NFSV3WRITE_DATASYNC; |
1180 | bool stalewriteverf; |
1181 | struct nfsmount *nmp = VFSTONFS(vp->v_mount); |
1182 | |
1183 | uiop->uio_rw = UIO_WRITE; |
1184 | nfsstats.write_physios++; |
1185 | rw_enter(&nmp->nm_writeverflock, RW_READER); |
1186 | error = nfs_writerpc(vp, uiop, &iomode, false, &stalewriteverf); |
1187 | rw_exit(&nmp->nm_writeverflock); |
1188 | if (stalewriteverf) { |
1189 | nfs_clearcommit(bp->b_vp->v_mount); |
1190 | } |
1191 | } |
1192 | bp->b_error = error; |
1193 | return error; |
1194 | } |
1195 | |
1196 | /* |
1197 | * Do an I/O operation to/from a cache block. This may be called |
1198 | * synchronously or from an nfsiod. |
1199 | */ |
1200 | int |
1201 | nfs_doio(struct buf *bp) |
1202 | { |
1203 | int error; |
1204 | struct uio uio; |
1205 | struct uio *uiop = &uio; |
1206 | struct iovec io; |
1207 | UVMHIST_FUNC("nfs_doio" ); UVMHIST_CALLED(ubchist); |
1208 | |
1209 | uiop->uio_iov = &io; |
1210 | uiop->uio_iovcnt = 1; |
1211 | uiop->uio_offset = (((off_t)bp->b_blkno) << DEV_BSHIFT); |
1212 | UIO_SETUP_SYSSPACE(uiop); |
1213 | io.iov_base = bp->b_data; |
1214 | io.iov_len = uiop->uio_resid = bp->b_bcount; |
1215 | |
1216 | /* |
1217 | * Historically, paging was done with physio, but no more... |
1218 | */ |
1219 | if (bp->b_flags & B_PHYS) { |
1220 | /* |
1221 | * ...though reading /dev/drum still gets us here. |
1222 | */ |
1223 | error = nfs_doio_phys(bp, uiop); |
1224 | } else if (bp->b_flags & B_READ) { |
1225 | error = nfs_doio_read(bp, uiop); |
1226 | } else { |
1227 | error = nfs_doio_write(bp, uiop); |
1228 | } |
1229 | bp->b_resid = uiop->uio_resid; |
1230 | biodone(bp); |
1231 | return (error); |
1232 | } |
1233 | |
1234 | /* |
1235 | * Vnode op for VM getpages. |
1236 | */ |
1237 | |
1238 | int |
1239 | nfs_getpages(void *v) |
1240 | { |
1241 | struct vop_getpages_args /* { |
1242 | struct vnode *a_vp; |
1243 | voff_t a_offset; |
1244 | struct vm_page **a_m; |
1245 | int *a_count; |
1246 | int a_centeridx; |
1247 | vm_prot_t a_access_type; |
1248 | int a_advice; |
1249 | int a_flags; |
1250 | } */ *ap = v; |
1251 | |
1252 | struct vnode *vp = ap->a_vp; |
1253 | struct uvm_object *uobj = &vp->v_uobj; |
1254 | struct nfsnode *np = VTONFS(vp); |
1255 | const int npages = *ap->a_count; |
1256 | struct vm_page *pg, **pgs, **opgs, *spgs[UBC_MAX_PAGES]; |
1257 | off_t origoffset, len; |
1258 | int i, error; |
1259 | bool v3 = NFS_ISV3(vp); |
1260 | bool write = (ap->a_access_type & VM_PROT_WRITE) != 0; |
1261 | bool locked = (ap->a_flags & PGO_LOCKED) != 0; |
1262 | |
1263 | /* |
1264 | * If we are not locked we are not really using opgs, |
1265 | * so just initialize it |
1266 | */ |
1267 | if (!locked || npages < __arraycount(spgs)) |
1268 | opgs = spgs; |
1269 | else { |
1270 | if ((opgs = kmem_alloc(npages * sizeof(*opgs), KM_NOSLEEP)) == |
1271 | NULL) |
1272 | return ENOMEM; |
1273 | } |
1274 | |
1275 | /* |
1276 | * call the genfs code to get the pages. `pgs' may be NULL |
1277 | * when doing read-ahead. |
1278 | */ |
1279 | pgs = ap->a_m; |
1280 | if (write && locked && v3) { |
1281 | KASSERT(pgs != NULL); |
1282 | #ifdef DEBUG |
1283 | |
1284 | /* |
1285 | * If PGO_LOCKED is set, real pages shouldn't exists |
1286 | * in the array. |
1287 | */ |
1288 | |
1289 | for (i = 0; i < npages; i++) |
1290 | KDASSERT(pgs[i] == NULL || pgs[i] == PGO_DONTCARE); |
1291 | #endif |
1292 | memcpy(opgs, pgs, npages * sizeof(struct vm_pages *)); |
1293 | } |
1294 | error = genfs_getpages(v); |
1295 | if (error) |
1296 | goto out; |
1297 | |
1298 | /* |
1299 | * for read faults where the nfs node is not yet marked NMODIFIED, |
1300 | * set PG_RDONLY on the pages so that we come back here if someone |
1301 | * tries to modify later via the mapping that will be entered for |
1302 | * this fault. |
1303 | */ |
1304 | |
1305 | if (!write && (np->n_flag & NMODIFIED) == 0 && pgs != NULL) { |
1306 | if (!locked) { |
1307 | mutex_enter(uobj->vmobjlock); |
1308 | } |
1309 | for (i = 0; i < npages; i++) { |
1310 | pg = pgs[i]; |
1311 | if (pg == NULL || pg == PGO_DONTCARE) { |
1312 | continue; |
1313 | } |
1314 | pg->flags |= PG_RDONLY; |
1315 | } |
1316 | if (!locked) { |
1317 | mutex_exit(uobj->vmobjlock); |
1318 | } |
1319 | } |
1320 | if (!write) |
1321 | goto out; |
1322 | |
1323 | /* |
1324 | * this is a write fault, update the commit info. |
1325 | */ |
1326 | |
1327 | origoffset = ap->a_offset; |
1328 | len = npages << PAGE_SHIFT; |
1329 | |
1330 | if (v3) { |
1331 | if (!locked) { |
1332 | mutex_enter(&np->n_commitlock); |
1333 | } else { |
1334 | if (!mutex_tryenter(&np->n_commitlock)) { |
1335 | |
1336 | /* |
1337 | * Since PGO_LOCKED is set, we need to unbusy |
1338 | * all pages fetched by genfs_getpages() above, |
1339 | * tell the caller that there are no pages |
1340 | * available and put back original pgs array. |
1341 | */ |
1342 | |
1343 | mutex_enter(&uvm_pageqlock); |
1344 | uvm_page_unbusy(pgs, npages); |
1345 | mutex_exit(&uvm_pageqlock); |
1346 | *ap->a_count = 0; |
1347 | memcpy(pgs, opgs, |
1348 | npages * sizeof(struct vm_pages *)); |
1349 | error = EBUSY; |
1350 | goto out; |
1351 | } |
1352 | } |
1353 | nfs_del_committed_range(vp, origoffset, len); |
1354 | nfs_del_tobecommitted_range(vp, origoffset, len); |
1355 | } |
1356 | np->n_flag |= NMODIFIED; |
1357 | if (!locked) { |
1358 | mutex_enter(uobj->vmobjlock); |
1359 | } |
1360 | for (i = 0; i < npages; i++) { |
1361 | pg = pgs[i]; |
1362 | if (pg == NULL || pg == PGO_DONTCARE) { |
1363 | continue; |
1364 | } |
1365 | pg->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); |
1366 | } |
1367 | if (!locked) { |
1368 | mutex_exit(uobj->vmobjlock); |
1369 | } |
1370 | if (v3) { |
1371 | mutex_exit(&np->n_commitlock); |
1372 | } |
1373 | out: |
1374 | if (opgs != spgs) |
1375 | kmem_free(opgs, sizeof(*opgs) * npages); |
1376 | return error; |
1377 | } |
1378 | |