1 | /* $NetBSD: layer_vnops.c,v 1.59 2016/08/20 12:37:09 hannken Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1999 National Aeronautics & Space Administration |
5 | * All rights reserved. |
6 | * |
7 | * This software was written by William Studenmund of the |
8 | * Numerical Aerospace Simulation Facility, NASA Ames Research Center. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * 3. Neither the name of the National Aeronautics & Space Administration |
19 | * nor the names of its contributors may be used to endorse or promote |
20 | * products derived from this software without specific prior written |
21 | * permission. |
22 | * |
23 | * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION |
24 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
25 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
26 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB- |
27 | * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, |
28 | * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
30 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
31 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
32 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | */ |
35 | |
36 | /* |
37 | * Copyright (c) 1992, 1993 |
38 | * The Regents of the University of California. All rights reserved. |
39 | * |
40 | * This code is derived from software contributed to Berkeley by |
41 | * John Heidemann of the UCLA Ficus project. |
42 | * |
43 | * Redistribution and use in source and binary forms, with or without |
44 | * modification, are permitted provided that the following conditions |
45 | * are met: |
46 | * 1. Redistributions of source code must retain the above copyright |
47 | * notice, this list of conditions and the following disclaimer. |
48 | * 2. Redistributions in binary form must reproduce the above copyright |
49 | * notice, this list of conditions and the following disclaimer in the |
50 | * documentation and/or other materials provided with the distribution. |
51 | * 3. Neither the name of the University nor the names of its contributors |
52 | * may be used to endorse or promote products derived from this software |
53 | * without specific prior written permission. |
54 | * |
55 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
56 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
57 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
58 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
59 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
60 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
61 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
62 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
63 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
64 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
65 | * SUCH DAMAGE. |
66 | * |
67 | * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 |
68 | * |
69 | * Ancestors: |
70 | * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 |
71 | * Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp |
72 | * ...and... |
73 | * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project |
74 | */ |
75 | |
76 | /* |
77 | * Generic layer vnode operations. |
78 | * |
79 | * The layer.h, layer_extern.h, layer_vfs.c, and layer_vnops.c files provide |
80 | * the core implementation of stacked file-systems. |
81 | * |
82 | * The layerfs duplicates a portion of the file system name space under |
83 | * a new name. In this respect, it is similar to the loopback file system. |
84 | * It differs from the loopback fs in two respects: it is implemented using |
85 | * a stackable layers technique, and it is "layerfs-nodes" stack above all |
86 | * lower-layer vnodes, not just over directory vnodes. |
87 | * |
88 | * OPERATION OF LAYERFS |
89 | * |
90 | * The layerfs is the minimum file system layer, bypassing all possible |
91 | * operations to the lower layer for processing there. The majority of its |
92 | * activity centers on the bypass routine, through which nearly all vnode |
93 | * operations pass. |
94 | * |
95 | * The bypass routine accepts arbitrary vnode operations for handling by |
96 | * the lower layer. It begins by examining vnode operation arguments and |
97 | * replacing any layered nodes by their lower-layer equivalents. It then |
98 | * invokes an operation on the lower layer. Finally, it replaces the |
99 | * layered nodes in the arguments and, if a vnode is returned by the |
100 | * operation, stacks a layered node on top of the returned vnode. |
101 | * |
102 | * The bypass routine in this file, layer_bypass(), is suitable for use |
103 | * by many different layered filesystems. It can be used by multiple |
104 | * filesystems simultaneously. Alternatively, a layered fs may provide |
105 | * its own bypass routine, in which case layer_bypass() should be used as |
106 | * a model. For instance, the main functionality provided by umapfs, the user |
107 | * identity mapping file system, is handled by a custom bypass routine. |
108 | * |
109 | * Typically a layered fs registers its selected bypass routine as the |
110 | * default vnode operation in its vnodeopv_entry_desc table. Additionally |
111 | * the filesystem must store the bypass entry point in the layerm_bypass |
112 | * field of struct layer_mount. All other layer routines in this file will |
113 | * use the layerm_bypass() routine. |
114 | * |
115 | * Although the bypass routine handles most operations outright, a number |
116 | * of operations are special cased and handled by the layerfs. For instance, |
117 | * layer_getattr() must change the fsid being returned. While layer_lock() |
118 | * and layer_unlock() must handle any locking for the current vnode as well |
119 | * as pass the lock request down. layer_inactive() and layer_reclaim() are |
120 | * not bypassed so that they can handle freeing layerfs-specific data. Also, |
121 | * certain vnode operations (create, mknod, remove, link, rename, mkdir, |
122 | * rmdir, and symlink) change the locking state within the operation. Ideally |
123 | * these operations should not change the lock state, but should be changed |
124 | * to let the caller of the function unlock them. Otherwise, all intermediate |
125 | * vnode layers (such as union, umapfs, etc) must catch these functions to do |
126 | * the necessary locking at their layer. |
127 | * |
128 | * INSTANTIATING VNODE STACKS |
129 | * |
130 | * Mounting associates "layerfs-nodes" stack and lower layer, in effect |
131 | * stacking two VFSes. The initial mount creates a single vnode stack for |
132 | * the root of the new layerfs. All other vnode stacks are created as a |
133 | * result of vnode operations on this or other layerfs vnode stacks. |
134 | * |
135 | * New vnode stacks come into existence as a result of an operation which |
136 | * returns a vnode. The bypass routine stacks a layerfs-node above the new |
137 | * vnode before returning it to the caller. |
138 | * |
139 | * For example, imagine mounting a null layer with: |
140 | * |
141 | * "mount_null /usr/include /dev/layer/null" |
142 | * |
143 | * Changing directory to /dev/layer/null will assign the root layerfs-node, |
144 | * which was created when the null layer was mounted). Now consider opening |
145 | * "sys". A layer_lookup() would be performed on the root layerfs-node. |
146 | * This operation would bypass through to the lower layer which would return |
147 | * a vnode representing the UFS "sys". Then, layer_bypass() builds a |
148 | * layerfs-node aliasing the UFS "sys" and returns this to the caller. |
149 | * Later operations on the layerfs-node "sys" will repeat this process when |
150 | * constructing other vnode stacks. |
151 | * |
152 | * INVOKING OPERATIONS ON LOWER LAYERS |
153 | * |
154 | * There are two techniques to invoke operations on a lower layer when the |
155 | * operation cannot be completely bypassed. Each method is appropriate in |
156 | * different situations. In both cases, it is the responsibility of the |
157 | * aliasing layer to make the operation arguments "correct" for the lower |
158 | * layer by mapping any vnode arguments to the lower layer. |
159 | * |
160 | * The first approach is to call the aliasing layer's bypass routine. This |
161 | * method is most suitable when you wish to invoke the operation currently |
162 | * being handled on the lower layer. It has the advantage that the bypass |
163 | * routine already must do argument mapping. An example of this is |
164 | * layer_getattr(). |
165 | * |
166 | * A second approach is to directly invoke vnode operations on the lower |
167 | * layer with the VOP_OPERATIONNAME interface. The advantage of this method |
168 | * is that it is easy to invoke arbitrary operations on the lower layer. |
169 | * The disadvantage is that vnode's arguments must be manually mapped. |
170 | */ |
171 | |
172 | #include <sys/cdefs.h> |
173 | __KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.59 2016/08/20 12:37:09 hannken Exp $" ); |
174 | |
175 | #include <sys/param.h> |
176 | #include <sys/systm.h> |
177 | #include <sys/proc.h> |
178 | #include <sys/time.h> |
179 | #include <sys/vnode.h> |
180 | #include <sys/mount.h> |
181 | #include <sys/namei.h> |
182 | #include <sys/kmem.h> |
183 | #include <sys/buf.h> |
184 | #include <sys/kauth.h> |
185 | |
186 | #include <miscfs/genfs/layer.h> |
187 | #include <miscfs/genfs/layer_extern.h> |
188 | #include <miscfs/genfs/genfs.h> |
189 | #include <miscfs/specfs/specdev.h> |
190 | |
191 | /* |
192 | * This is the 08-June-99 bypass routine, based on the 10-Apr-92 bypass |
193 | * routine by John Heidemann. |
194 | * The new element for this version is that the whole nullfs |
195 | * system gained the concept of locks on the lower node. |
196 | * The 10-Apr-92 version was optimized for speed, throwing away some |
197 | * safety checks. It should still always work, but it's not as |
198 | * robust to programmer errors. |
199 | * |
200 | * In general, we map all vnodes going down and unmap them on the way back. |
201 | * |
202 | * Also, some BSD vnode operations have the side effect of vrele'ing |
203 | * their arguments. With stacking, the reference counts are held |
204 | * by the upper node, not the lower one, so we must handle these |
205 | * side-effects here. This is not of concern in Sun-derived systems |
206 | * since there are no such side-effects. |
207 | * |
208 | * New for the 08-June-99 version: we also handle operations which unlock |
209 | * the passed-in node (typically they vput the node). |
210 | * |
211 | * This makes the following assumptions: |
212 | * - only one returned vpp |
213 | * - no INOUT vpp's (Sun's vop_open has one of these) |
214 | * - the vnode operation vector of the first vnode should be used |
215 | * to determine what implementation of the op should be invoked |
216 | * - all mapped vnodes are of our vnode-type (NEEDSWORK: |
217 | * problems on rmdir'ing mount points and renaming?) |
218 | */ |
219 | int |
220 | layer_bypass(void *v) |
221 | { |
222 | struct vop_generic_args /* { |
223 | struct vnodeop_desc *a_desc; |
224 | <other random data follows, presumably> |
225 | } */ *ap = v; |
226 | int (**our_vnodeop_p)(void *); |
227 | struct vnode **this_vp_p; |
228 | int error; |
229 | struct vnode *old_vps[VDESC_MAX_VPS], *vp0; |
230 | struct vnode **vps_p[VDESC_MAX_VPS]; |
231 | struct vnode ***vppp; |
232 | struct mount *mp; |
233 | struct vnodeop_desc *descp = ap->a_desc; |
234 | int reles, i, flags; |
235 | |
236 | #ifdef DIAGNOSTIC |
237 | /* |
238 | * We require at least one vp. |
239 | */ |
240 | if (descp->vdesc_vp_offsets == NULL || |
241 | descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) |
242 | panic("%s: no vp's in map.\n" , __func__); |
243 | #endif |
244 | |
245 | vps_p[0] = |
246 | VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap); |
247 | vp0 = *vps_p[0]; |
248 | mp = vp0->v_mount; |
249 | flags = MOUNTTOLAYERMOUNT(mp)->layerm_flags; |
250 | our_vnodeop_p = vp0->v_op; |
251 | |
252 | if (flags & LAYERFS_MBYPASSDEBUG) |
253 | printf("%s: %s\n" , __func__, descp->vdesc_name); |
254 | |
255 | /* |
256 | * Map the vnodes going in. |
257 | * Later, we'll invoke the operation based on |
258 | * the first mapped vnode's operation vector. |
259 | */ |
260 | reles = descp->vdesc_flags; |
261 | for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { |
262 | if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) |
263 | break; /* bail out at end of list */ |
264 | vps_p[i] = this_vp_p = |
265 | VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], |
266 | ap); |
267 | /* |
268 | * We're not guaranteed that any but the first vnode |
269 | * are of our type. Check for and don't map any |
270 | * that aren't. (We must always map first vp or vclean fails.) |
271 | */ |
272 | if (i && (*this_vp_p == NULL || |
273 | (*this_vp_p)->v_op != our_vnodeop_p)) { |
274 | old_vps[i] = NULL; |
275 | } else { |
276 | old_vps[i] = *this_vp_p; |
277 | *(vps_p[i]) = LAYERVPTOLOWERVP(*this_vp_p); |
278 | /* |
279 | * XXX - Several operations have the side effect |
280 | * of vrele'ing their vp's. We must account for |
281 | * that. (This should go away in the future.) |
282 | */ |
283 | if (reles & VDESC_VP0_WILLRELE) |
284 | vref(*this_vp_p); |
285 | } |
286 | } |
287 | |
288 | /* |
289 | * Call the operation on the lower layer |
290 | * with the modified argument structure. |
291 | */ |
292 | error = VCALL(*vps_p[0], descp->vdesc_offset, ap); |
293 | |
294 | /* |
295 | * Maintain the illusion of call-by-value |
296 | * by restoring vnodes in the argument structure |
297 | * to their original value. |
298 | */ |
299 | reles = descp->vdesc_flags; |
300 | for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { |
301 | if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) |
302 | break; /* bail out at end of list */ |
303 | if (old_vps[i]) { |
304 | *(vps_p[i]) = old_vps[i]; |
305 | if (reles & VDESC_VP0_WILLRELE) |
306 | vrele(*(vps_p[i])); |
307 | } |
308 | } |
309 | |
310 | /* |
311 | * Map the possible out-going vpp |
312 | * (Assumes that the lower layer always returns |
313 | * a VREF'ed vpp unless it gets an error.) |
314 | */ |
315 | if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && !error) { |
316 | vppp = VOPARG_OFFSETTO(struct vnode***, |
317 | descp->vdesc_vpp_offset, ap); |
318 | /* |
319 | * Only vop_lookup, vop_create, vop_makedir, vop_mknod |
320 | * and vop_symlink return vpp's. vop_lookup doesn't call bypass |
321 | * as a lookup on "." would generate a locking error. |
322 | * So all the calls which get us here have a unlocked vpp. :-) |
323 | */ |
324 | error = layer_node_create(mp, **vppp, *vppp); |
325 | if (error) { |
326 | vrele(**vppp); |
327 | **vppp = NULL; |
328 | } |
329 | } |
330 | return error; |
331 | } |
332 | |
333 | /* |
334 | * We have to carry on the locking protocol on the layer vnodes |
335 | * as we progress through the tree. We also have to enforce read-only |
336 | * if this layer is mounted read-only. |
337 | */ |
338 | int |
339 | layer_lookup(void *v) |
340 | { |
341 | struct vop_lookup_v2_args /* { |
342 | struct vnodeop_desc *a_desc; |
343 | struct vnode * a_dvp; |
344 | struct vnode ** a_vpp; |
345 | struct componentname * a_cnp; |
346 | } */ *ap = v; |
347 | struct componentname *cnp = ap->a_cnp; |
348 | struct vnode *dvp, *lvp, *ldvp; |
349 | int error, flags = cnp->cn_flags; |
350 | |
351 | dvp = ap->a_dvp; |
352 | |
353 | if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && |
354 | (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { |
355 | *ap->a_vpp = NULL; |
356 | return EROFS; |
357 | } |
358 | |
359 | ldvp = LAYERVPTOLOWERVP(dvp); |
360 | ap->a_dvp = ldvp; |
361 | error = VCALL(ldvp, ap->a_desc->vdesc_offset, ap); |
362 | lvp = *ap->a_vpp; |
363 | *ap->a_vpp = NULL; |
364 | |
365 | if (error == EJUSTRETURN && (flags & ISLASTCN) && |
366 | (dvp->v_mount->mnt_flag & MNT_RDONLY) && |
367 | (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) |
368 | error = EROFS; |
369 | |
370 | /* |
371 | * We must do the same locking and unlocking at this layer as |
372 | * is done in the layers below us. |
373 | */ |
374 | if (ldvp == lvp) { |
375 | /* |
376 | * Got the same object back, because we looked up ".", |
377 | * or ".." in the root node of a mount point. |
378 | * So we make another reference to dvp and return it. |
379 | */ |
380 | vref(dvp); |
381 | *ap->a_vpp = dvp; |
382 | vrele(lvp); |
383 | } else if (lvp != NULL) { |
384 | /* Note: dvp and ldvp are both locked. */ |
385 | error = layer_node_create(dvp->v_mount, lvp, ap->a_vpp); |
386 | if (error) { |
387 | vrele(lvp); |
388 | } |
389 | } |
390 | return error; |
391 | } |
392 | |
393 | /* |
394 | * Setattr call. Disallow write attempts if the layer is mounted read-only. |
395 | */ |
396 | int |
397 | layer_setattr(void *v) |
398 | { |
399 | struct vop_setattr_args /* { |
400 | struct vnodeop_desc *a_desc; |
401 | struct vnode *a_vp; |
402 | struct vattr *a_vap; |
403 | kauth_cred_t a_cred; |
404 | struct lwp *a_l; |
405 | } */ *ap = v; |
406 | struct vnode *vp = ap->a_vp; |
407 | struct vattr *vap = ap->a_vap; |
408 | |
409 | if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || |
410 | vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || |
411 | vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && |
412 | (vp->v_mount->mnt_flag & MNT_RDONLY)) |
413 | return EROFS; |
414 | if (vap->va_size != VNOVAL) { |
415 | switch (vp->v_type) { |
416 | case VDIR: |
417 | return EISDIR; |
418 | case VCHR: |
419 | case VBLK: |
420 | case VSOCK: |
421 | case VFIFO: |
422 | return 0; |
423 | case VREG: |
424 | case VLNK: |
425 | default: |
426 | /* |
427 | * Disallow write attempts if the filesystem is |
428 | * mounted read-only. |
429 | */ |
430 | if (vp->v_mount->mnt_flag & MNT_RDONLY) |
431 | return EROFS; |
432 | } |
433 | } |
434 | return LAYERFS_DO_BYPASS(vp, ap); |
435 | } |
436 | |
437 | /* |
438 | * We handle getattr only to change the fsid. |
439 | */ |
440 | int |
441 | layer_getattr(void *v) |
442 | { |
443 | struct vop_getattr_args /* { |
444 | struct vnode *a_vp; |
445 | struct vattr *a_vap; |
446 | kauth_cred_t a_cred; |
447 | struct lwp *a_l; |
448 | } */ *ap = v; |
449 | struct vnode *vp = ap->a_vp; |
450 | int error; |
451 | |
452 | error = LAYERFS_DO_BYPASS(vp, ap); |
453 | if (error) { |
454 | return error; |
455 | } |
456 | /* Requires that arguments be restored. */ |
457 | ap->a_vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0]; |
458 | return 0; |
459 | } |
460 | |
461 | int |
462 | layer_access(void *v) |
463 | { |
464 | struct vop_access_args /* { |
465 | struct vnode *a_vp; |
466 | int a_mode; |
467 | kauth_cred_t a_cred; |
468 | struct lwp *a_l; |
469 | } */ *ap = v; |
470 | struct vnode *vp = ap->a_vp; |
471 | mode_t mode = ap->a_mode; |
472 | |
473 | /* |
474 | * Disallow write attempts on read-only layers; |
475 | * unless the file is a socket, fifo, or a block or |
476 | * character device resident on the file system. |
477 | */ |
478 | if (mode & VWRITE) { |
479 | switch (vp->v_type) { |
480 | case VDIR: |
481 | case VLNK: |
482 | case VREG: |
483 | if (vp->v_mount->mnt_flag & MNT_RDONLY) |
484 | return EROFS; |
485 | break; |
486 | default: |
487 | break; |
488 | } |
489 | } |
490 | return LAYERFS_DO_BYPASS(vp, ap); |
491 | } |
492 | |
493 | /* |
494 | * We must handle open to be able to catch MNT_NODEV and friends. |
495 | */ |
496 | int |
497 | layer_open(void *v) |
498 | { |
499 | struct vop_open_args /* { |
500 | const struct vnodeop_desc *a_desc; |
501 | struct vnode *a_vp; |
502 | int a_mode; |
503 | kauth_cred_t a_cred; |
504 | } */ *ap = v; |
505 | struct vnode *vp = ap->a_vp; |
506 | enum vtype lower_type = LAYERVPTOLOWERVP(vp)->v_type; |
507 | |
508 | if (((lower_type == VBLK) || (lower_type == VCHR)) && |
509 | (vp->v_mount->mnt_flag & MNT_NODEV)) |
510 | return ENXIO; |
511 | |
512 | return LAYERFS_DO_BYPASS(vp, ap); |
513 | } |
514 | |
515 | /* |
516 | * If vinvalbuf is calling us, it's a "shallow fsync" -- don't bother |
517 | * syncing the underlying vnodes, since they'll be fsync'ed when |
518 | * reclaimed; otherwise, pass it through to the underlying layer. |
519 | * |
520 | * XXX Do we still need to worry about shallow fsync? |
521 | */ |
522 | int |
523 | layer_fsync(void *v) |
524 | { |
525 | struct vop_fsync_args /* { |
526 | struct vnode *a_vp; |
527 | kauth_cred_t a_cred; |
528 | int a_flags; |
529 | off_t offlo; |
530 | off_t offhi; |
531 | struct lwp *a_l; |
532 | } */ *ap = v; |
533 | int error; |
534 | |
535 | if (ap->a_flags & FSYNC_RECLAIM) { |
536 | return 0; |
537 | } |
538 | if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) { |
539 | error = spec_fsync(v); |
540 | if (error) |
541 | return error; |
542 | } |
543 | return LAYERFS_DO_BYPASS(ap->a_vp, ap); |
544 | } |
545 | |
546 | int |
547 | layer_inactive(void *v) |
548 | { |
549 | struct vop_inactive_args /* { |
550 | struct vnode *a_vp; |
551 | bool *a_recycle; |
552 | } */ *ap = v; |
553 | struct vnode *vp = ap->a_vp; |
554 | |
555 | /* |
556 | * If we did a remove, don't cache the node. |
557 | */ |
558 | *ap->a_recycle = ((VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED) != 0); |
559 | |
560 | /* |
561 | * Do nothing (and _don't_ bypass). |
562 | * Wait to vrele lowervp until reclaim, |
563 | * so that until then our layer_node is in the |
564 | * cache and reusable. |
565 | * |
566 | * NEEDSWORK: Someday, consider inactive'ing |
567 | * the lowervp and then trying to reactivate it |
568 | * with capabilities (v_id) |
569 | * like they do in the name lookup cache code. |
570 | * That's too much work for now. |
571 | */ |
572 | VOP_UNLOCK(vp); |
573 | return 0; |
574 | } |
575 | |
576 | int |
577 | layer_remove(void *v) |
578 | { |
579 | struct vop_remove_args /* { |
580 | struct vonde *a_dvp; |
581 | struct vnode *a_vp; |
582 | struct componentname *a_cnp; |
583 | } */ *ap = v; |
584 | struct vnode *vp = ap->a_vp; |
585 | int error; |
586 | |
587 | vref(vp); |
588 | error = LAYERFS_DO_BYPASS(vp, ap); |
589 | if (error == 0) { |
590 | VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED; |
591 | } |
592 | vrele(vp); |
593 | |
594 | return error; |
595 | } |
596 | |
597 | int |
598 | layer_rename(void *v) |
599 | { |
600 | struct vop_rename_args /* { |
601 | struct vnode *a_fdvp; |
602 | struct vnode *a_fvp; |
603 | struct componentname *a_fcnp; |
604 | struct vnode *a_tdvp; |
605 | struct vnode *a_tvp; |
606 | struct componentname *a_tcnp; |
607 | } */ *ap = v; |
608 | struct vnode *fdvp = ap->a_fdvp, *tvp; |
609 | int error; |
610 | |
611 | tvp = ap->a_tvp; |
612 | if (tvp) { |
613 | if (tvp->v_mount != fdvp->v_mount) |
614 | tvp = NULL; |
615 | else |
616 | vref(tvp); |
617 | } |
618 | error = LAYERFS_DO_BYPASS(fdvp, ap); |
619 | if (tvp) { |
620 | if (error == 0) |
621 | VTOLAYER(tvp)->layer_flags |= LAYERFS_REMOVED; |
622 | vrele(tvp); |
623 | } |
624 | return error; |
625 | } |
626 | |
627 | int |
628 | layer_rmdir(void *v) |
629 | { |
630 | struct vop_rmdir_args /* { |
631 | struct vnode *a_dvp; |
632 | struct vnode *a_vp; |
633 | struct componentname *a_cnp; |
634 | } */ *ap = v; |
635 | int error; |
636 | struct vnode *vp = ap->a_vp; |
637 | |
638 | vref(vp); |
639 | error = LAYERFS_DO_BYPASS(vp, ap); |
640 | if (error == 0) { |
641 | VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED; |
642 | } |
643 | vrele(vp); |
644 | |
645 | return error; |
646 | } |
647 | |
648 | int |
649 | layer_revoke(void *v) |
650 | { |
651 | struct vop_revoke_args /* { |
652 | struct vnode *a_vp; |
653 | int a_flags; |
654 | } */ *ap = v; |
655 | struct vnode *vp = ap->a_vp; |
656 | struct vnode *lvp = LAYERVPTOLOWERVP(vp); |
657 | int error; |
658 | |
659 | /* |
660 | * We will most likely end up in vclean which uses the v_usecount |
661 | * to determine if a vnode is active. Take an extra reference on |
662 | * the lower vnode so it will always close and inactivate. |
663 | */ |
664 | vref(lvp); |
665 | error = LAYERFS_DO_BYPASS(vp, ap); |
666 | vrele(lvp); |
667 | |
668 | return error; |
669 | } |
670 | |
671 | int |
672 | layer_reclaim(void *v) |
673 | { |
674 | struct vop_reclaim_args /* { |
675 | struct vnode *a_vp; |
676 | struct lwp *a_l; |
677 | } */ *ap = v; |
678 | struct vnode *vp = ap->a_vp; |
679 | struct layer_mount *lmp = MOUNTTOLAYERMOUNT(vp->v_mount); |
680 | struct layer_node *xp = VTOLAYER(vp); |
681 | struct vnode *lowervp = xp->layer_lowervp; |
682 | |
683 | /* |
684 | * Note: in vop_reclaim, the node's struct lock has been |
685 | * decomissioned, so we have to be careful about calling |
686 | * VOP's on ourself. We must be careful as VXLOCK is set. |
687 | */ |
688 | if (vp == lmp->layerm_rootvp) { |
689 | /* |
690 | * Oops! We no longer have a root node. Most likely reason is |
691 | * that someone forcably unmunted the underlying fs. |
692 | * |
693 | * Now getting the root vnode will fail. We're dead. :-( |
694 | */ |
695 | lmp->layerm_rootvp = NULL; |
696 | } |
697 | /* After this assignment, this node will not be re-used. */ |
698 | xp->layer_lowervp = NULL; |
699 | kmem_free(vp->v_data, lmp->layerm_size); |
700 | vp->v_data = NULL; |
701 | vrele(lowervp); |
702 | |
703 | return 0; |
704 | } |
705 | |
706 | int |
707 | layer_lock(void *v) |
708 | { |
709 | struct vop_lock_args /* { |
710 | struct vnode *a_vp; |
711 | int a_flags; |
712 | } */ *ap = v; |
713 | struct vnode *vp = ap->a_vp; |
714 | struct vnode *lowervp = LAYERVPTOLOWERVP(vp); |
715 | int flags = ap->a_flags; |
716 | int error; |
717 | |
718 | if (ISSET(flags, LK_NOWAIT)) { |
719 | error = VOP_LOCK(lowervp, flags); |
720 | if (error) |
721 | return error; |
722 | if (mutex_tryenter(vp->v_interlock)) { |
723 | error = vdead_check(vp, VDEAD_NOWAIT); |
724 | mutex_exit(vp->v_interlock); |
725 | } else |
726 | error = EBUSY; |
727 | if (error) |
728 | VOP_UNLOCK(lowervp); |
729 | return error; |
730 | } |
731 | |
732 | error = VOP_LOCK(lowervp, flags); |
733 | if (error) |
734 | return error; |
735 | |
736 | mutex_enter(vp->v_interlock); |
737 | error = vdead_check(vp, VDEAD_NOWAIT); |
738 | if (error) { |
739 | VOP_UNLOCK(lowervp); |
740 | error = vdead_check(vp, 0); |
741 | KASSERT(error == ENOENT); |
742 | } |
743 | mutex_exit(vp->v_interlock); |
744 | |
745 | return error; |
746 | } |
747 | |
748 | /* |
749 | * We just feed the returned vnode up to the caller - there's no need |
750 | * to build a layer node on top of the node on which we're going to do |
751 | * i/o. :-) |
752 | */ |
753 | int |
754 | layer_bmap(void *v) |
755 | { |
756 | struct vop_bmap_args /* { |
757 | struct vnode *a_vp; |
758 | daddr_t a_bn; |
759 | struct vnode **a_vpp; |
760 | daddr_t *a_bnp; |
761 | int *a_runp; |
762 | } */ *ap = v; |
763 | struct vnode *vp; |
764 | |
765 | vp = LAYERVPTOLOWERVP(ap->a_vp); |
766 | ap->a_vp = vp; |
767 | |
768 | return VCALL(vp, ap->a_desc->vdesc_offset, ap); |
769 | } |
770 | |
771 | int |
772 | layer_print(void *v) |
773 | { |
774 | struct vop_print_args /* { |
775 | struct vnode *a_vp; |
776 | } */ *ap = v; |
777 | struct vnode *vp = ap->a_vp; |
778 | printf ("\ttag VT_LAYERFS, vp=%p, lowervp=%p\n" , vp, LAYERVPTOLOWERVP(vp)); |
779 | return 0; |
780 | } |
781 | |
782 | int |
783 | layer_getpages(void *v) |
784 | { |
785 | struct vop_getpages_args /* { |
786 | struct vnode *a_vp; |
787 | voff_t a_offset; |
788 | struct vm_page **a_m; |
789 | int *a_count; |
790 | int a_centeridx; |
791 | vm_prot_t a_access_type; |
792 | int a_advice; |
793 | int a_flags; |
794 | } */ *ap = v; |
795 | struct vnode *vp = ap->a_vp; |
796 | |
797 | KASSERT(mutex_owned(vp->v_interlock)); |
798 | |
799 | if (ap->a_flags & PGO_LOCKED) { |
800 | return EBUSY; |
801 | } |
802 | ap->a_vp = LAYERVPTOLOWERVP(vp); |
803 | KASSERT(vp->v_interlock == ap->a_vp->v_interlock); |
804 | |
805 | /* Just pass the request on to the underlying layer. */ |
806 | return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap); |
807 | } |
808 | |
809 | int |
810 | layer_putpages(void *v) |
811 | { |
812 | struct vop_putpages_args /* { |
813 | struct vnode *a_vp; |
814 | voff_t a_offlo; |
815 | voff_t a_offhi; |
816 | int a_flags; |
817 | } */ *ap = v; |
818 | struct vnode *vp = ap->a_vp; |
819 | |
820 | KASSERT(mutex_owned(vp->v_interlock)); |
821 | |
822 | ap->a_vp = LAYERVPTOLOWERVP(vp); |
823 | KASSERT(vp->v_interlock == ap->a_vp->v_interlock); |
824 | |
825 | if (ap->a_flags & PGO_RECLAIM) { |
826 | mutex_exit(vp->v_interlock); |
827 | return 0; |
828 | } |
829 | |
830 | /* Just pass the request on to the underlying layer. */ |
831 | return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap); |
832 | } |
833 | |