1 | /* $NetBSD: uvm_vnode.c,v 1.102 2015/12/06 09:38:54 wiz Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1997 Charles D. Cranor and Washington University. |
5 | * Copyright (c) 1991, 1993 |
6 | * The Regents of the University of California. |
7 | * Copyright (c) 1990 University of Utah. |
8 | * |
9 | * All rights reserved. |
10 | * |
11 | * This code is derived from software contributed to Berkeley by |
12 | * the Systems Programming Group of the University of Utah Computer |
13 | * Science Department. |
14 | * |
15 | * Redistribution and use in source and binary forms, with or without |
16 | * modification, are permitted provided that the following conditions |
17 | * are met: |
18 | * 1. Redistributions of source code must retain the above copyright |
19 | * notice, this list of conditions and the following disclaimer. |
20 | * 2. Redistributions in binary form must reproduce the above copyright |
21 | * notice, this list of conditions and the following disclaimer in the |
22 | * documentation and/or other materials provided with the distribution. |
23 | * 3. Neither the name of the University nor the names of its contributors |
24 | * may be used to endorse or promote products derived from this software |
25 | * without specific prior written permission. |
26 | * |
27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
37 | * SUCH DAMAGE. |
38 | * |
39 | * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94 |
40 | * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp |
41 | */ |
42 | |
43 | /* |
44 | * uvm_vnode.c: the vnode pager. |
45 | */ |
46 | |
47 | #include <sys/cdefs.h> |
48 | __KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.102 2015/12/06 09:38:54 wiz Exp $" ); |
49 | |
50 | #ifdef _KERNEL_OPT |
51 | #include "opt_uvmhist.h" |
52 | #endif |
53 | |
54 | #include <sys/param.h> |
55 | #include <sys/systm.h> |
56 | #include <sys/kernel.h> |
57 | #include <sys/vnode.h> |
58 | #include <sys/disklabel.h> |
59 | #include <sys/ioctl.h> |
60 | #include <sys/fcntl.h> |
61 | #include <sys/conf.h> |
62 | #include <sys/pool.h> |
63 | #include <sys/mount.h> |
64 | |
65 | #include <miscfs/specfs/specdev.h> |
66 | |
67 | #include <uvm/uvm.h> |
68 | #include <uvm/uvm_readahead.h> |
69 | |
70 | #ifdef UVMHIST |
71 | UVMHIST_DEFINE(ubchist); |
72 | #endif |
73 | |
74 | /* |
75 | * functions |
76 | */ |
77 | |
78 | static void uvn_detach(struct uvm_object *); |
79 | static int uvn_get(struct uvm_object *, voff_t, struct vm_page **, int *, |
80 | int, vm_prot_t, int, int); |
81 | static int uvn_put(struct uvm_object *, voff_t, voff_t, int); |
82 | static void uvn_reference(struct uvm_object *); |
83 | |
84 | static int uvn_findpage(struct uvm_object *, voff_t, struct vm_page **, |
85 | int); |
86 | |
87 | /* |
88 | * master pager structure |
89 | */ |
90 | |
91 | const struct uvm_pagerops uvm_vnodeops = { |
92 | .pgo_reference = uvn_reference, |
93 | .pgo_detach = uvn_detach, |
94 | .pgo_get = uvn_get, |
95 | .pgo_put = uvn_put, |
96 | }; |
97 | |
98 | /* |
99 | * the ops! |
100 | */ |
101 | |
102 | /* |
103 | * uvn_reference |
104 | * |
105 | * duplicate a reference to a VM object. Note that the reference |
106 | * count must already be at least one (the passed in reference) so |
107 | * there is no chance of the uvn being killed or locked out here. |
108 | * |
109 | * => caller must call with object unlocked. |
110 | * => caller must be using the same accessprot as was used at attach time |
111 | */ |
112 | |
113 | static void |
114 | uvn_reference(struct uvm_object *uobj) |
115 | { |
116 | vref((struct vnode *)uobj); |
117 | } |
118 | |
119 | |
120 | /* |
121 | * uvn_detach |
122 | * |
123 | * remove a reference to a VM object. |
124 | * |
125 | * => caller must call with object unlocked and map locked. |
126 | */ |
127 | |
128 | static void |
129 | uvn_detach(struct uvm_object *uobj) |
130 | { |
131 | vrele((struct vnode *)uobj); |
132 | } |
133 | |
134 | /* |
135 | * uvn_put: flush page data to backing store. |
136 | * |
137 | * => object must be locked on entry! VOP_PUTPAGES must unlock it. |
138 | * => flags: PGO_SYNCIO -- use sync. I/O |
139 | * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed) |
140 | */ |
141 | |
142 | static int |
143 | uvn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags) |
144 | { |
145 | struct vnode *vp = (struct vnode *)uobj; |
146 | int error; |
147 | |
148 | KASSERT(mutex_owned(vp->v_interlock)); |
149 | error = VOP_PUTPAGES(vp, offlo, offhi, flags); |
150 | |
151 | return error; |
152 | } |
153 | |
154 | |
155 | /* |
156 | * uvn_get: get pages (synchronously) from backing store |
157 | * |
158 | * => prefer map unlocked (not required) |
159 | * => object must be locked! we will _unlock_ it before starting any I/O. |
160 | * => flags: PGO_ALLPAGES: get all of the pages |
161 | * PGO_LOCKED: fault data structures are locked |
162 | * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] |
163 | * => NOTE: caller must check for released pages!! |
164 | */ |
165 | |
166 | static int |
167 | uvn_get(struct uvm_object *uobj, voff_t offset, |
168 | struct vm_page **pps /* IN/OUT */, |
169 | int *npagesp /* IN (OUT if PGO_LOCKED)*/, |
170 | int centeridx, vm_prot_t access_type, int advice, int flags) |
171 | { |
172 | struct vnode *vp = (struct vnode *)uobj; |
173 | int error; |
174 | |
175 | UVMHIST_FUNC("uvn_get" ); UVMHIST_CALLED(ubchist); |
176 | |
177 | UVMHIST_LOG(ubchist, "vp %p off 0x%x" , vp, (int)offset, 0,0); |
178 | |
179 | if (vp->v_type == VREG && (access_type & VM_PROT_WRITE) == 0 |
180 | && (flags & PGO_LOCKED) == 0) { |
181 | vn_ra_allocctx(vp); |
182 | uvm_ra_request(vp->v_ractx, advice, uobj, offset, |
183 | *npagesp << PAGE_SHIFT); |
184 | } |
185 | |
186 | error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx, |
187 | access_type, advice, flags); |
188 | |
189 | KASSERT(((flags & PGO_LOCKED) != 0 && mutex_owned(vp->v_interlock)) || |
190 | (flags & PGO_LOCKED) == 0); |
191 | return error; |
192 | } |
193 | |
194 | |
195 | /* |
196 | * uvn_findpages: |
197 | * return the page for the uobj and offset requested, allocating if needed. |
198 | * => uobj must be locked. |
199 | * => returned pages will be BUSY. |
200 | */ |
201 | |
202 | int |
203 | uvn_findpages(struct uvm_object *uobj, voff_t offset, int *npagesp, |
204 | struct vm_page **pgs, int flags) |
205 | { |
206 | int i, count, found, npages, rv; |
207 | |
208 | count = found = 0; |
209 | npages = *npagesp; |
210 | if (flags & UFP_BACKWARD) { |
211 | for (i = npages - 1; i >= 0; i--, offset -= PAGE_SIZE) { |
212 | rv = uvn_findpage(uobj, offset, &pgs[i], flags); |
213 | if (rv == 0) { |
214 | if (flags & UFP_DIRTYONLY) |
215 | break; |
216 | } else |
217 | found++; |
218 | count++; |
219 | } |
220 | } else { |
221 | for (i = 0; i < npages; i++, offset += PAGE_SIZE) { |
222 | rv = uvn_findpage(uobj, offset, &pgs[i], flags); |
223 | if (rv == 0) { |
224 | if (flags & UFP_DIRTYONLY) |
225 | break; |
226 | } else |
227 | found++; |
228 | count++; |
229 | } |
230 | } |
231 | *npagesp = count; |
232 | return (found); |
233 | } |
234 | |
235 | static int |
236 | uvn_findpage(struct uvm_object *uobj, voff_t offset, struct vm_page **pgp, |
237 | int flags) |
238 | { |
239 | struct vm_page *pg; |
240 | bool dirty; |
241 | UVMHIST_FUNC("uvn_findpage" ); UVMHIST_CALLED(ubchist); |
242 | UVMHIST_LOG(ubchist, "vp %p off 0x%lx" , uobj, offset,0,0); |
243 | |
244 | KASSERT(mutex_owned(uobj->vmobjlock)); |
245 | |
246 | if (*pgp != NULL) { |
247 | UVMHIST_LOG(ubchist, "dontcare" , 0,0,0,0); |
248 | return 0; |
249 | } |
250 | for (;;) { |
251 | /* look for an existing page */ |
252 | pg = uvm_pagelookup(uobj, offset); |
253 | |
254 | /* nope? allocate one now */ |
255 | if (pg == NULL) { |
256 | if (flags & UFP_NOALLOC) { |
257 | UVMHIST_LOG(ubchist, "noalloc" , 0,0,0,0); |
258 | return 0; |
259 | } |
260 | pg = uvm_pagealloc(uobj, offset, NULL, |
261 | UVM_FLAG_COLORMATCH); |
262 | if (pg == NULL) { |
263 | if (flags & UFP_NOWAIT) { |
264 | UVMHIST_LOG(ubchist, "nowait" ,0,0,0,0); |
265 | return 0; |
266 | } |
267 | mutex_exit(uobj->vmobjlock); |
268 | uvm_wait("uvn_fp1" ); |
269 | mutex_enter(uobj->vmobjlock); |
270 | continue; |
271 | } |
272 | UVMHIST_LOG(ubchist, "alloced %p (color %u)" , pg, |
273 | VM_PGCOLOR_BUCKET(pg), 0,0); |
274 | break; |
275 | } else if (flags & UFP_NOCACHE) { |
276 | UVMHIST_LOG(ubchist, "nocache" ,0,0,0,0); |
277 | return 0; |
278 | } |
279 | |
280 | /* page is there, see if we need to wait on it */ |
281 | if ((pg->flags & PG_BUSY) != 0) { |
282 | if (flags & UFP_NOWAIT) { |
283 | UVMHIST_LOG(ubchist, "nowait" ,0,0,0,0); |
284 | return 0; |
285 | } |
286 | pg->flags |= PG_WANTED; |
287 | UVMHIST_LOG(ubchist, "wait %p (color %u)" , pg, |
288 | VM_PGCOLOR_BUCKET(pg), 0,0); |
289 | UVM_UNLOCK_AND_WAIT(pg, uobj->vmobjlock, 0, |
290 | "uvn_fp2" , 0); |
291 | mutex_enter(uobj->vmobjlock); |
292 | continue; |
293 | } |
294 | |
295 | /* skip PG_RDONLY pages if requested */ |
296 | if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) { |
297 | UVMHIST_LOG(ubchist, "nordonly" ,0,0,0,0); |
298 | return 0; |
299 | } |
300 | |
301 | /* stop on clean pages if requested */ |
302 | if (flags & UFP_DIRTYONLY) { |
303 | dirty = pmap_clear_modify(pg) || |
304 | (pg->flags & PG_CLEAN) == 0; |
305 | pg->flags |= PG_CLEAN; |
306 | if (!dirty) { |
307 | UVMHIST_LOG(ubchist, "dirtonly" , 0,0,0,0); |
308 | return 0; |
309 | } |
310 | } |
311 | |
312 | /* mark the page BUSY and we're done. */ |
313 | pg->flags |= PG_BUSY; |
314 | UVM_PAGE_OWN(pg, "uvn_findpage" ); |
315 | UVMHIST_LOG(ubchist, "found %p (color %u)" , |
316 | pg, VM_PGCOLOR_BUCKET(pg), 0,0); |
317 | break; |
318 | } |
319 | *pgp = pg; |
320 | return 1; |
321 | } |
322 | |
323 | /* |
324 | * uvm_vnp_setsize: grow or shrink a vnode uobj |
325 | * |
326 | * grow => just update size value |
327 | * shrink => toss un-needed pages |
328 | * |
329 | * => we assume that the caller has a reference of some sort to the |
330 | * vnode in question so that it will not be yanked out from under |
331 | * us. |
332 | */ |
333 | |
334 | void |
335 | uvm_vnp_setsize(struct vnode *vp, voff_t newsize) |
336 | { |
337 | struct uvm_object *uobj = &vp->v_uobj; |
338 | voff_t pgend = round_page(newsize); |
339 | voff_t oldsize; |
340 | UVMHIST_FUNC("uvm_vnp_setsize" ); UVMHIST_CALLED(ubchist); |
341 | |
342 | mutex_enter(uobj->vmobjlock); |
343 | UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x" , |
344 | vp, vp->v_size, newsize, 0); |
345 | |
346 | /* |
347 | * now check if the size has changed: if we shrink we had better |
348 | * toss some pages... |
349 | */ |
350 | |
351 | KASSERT(newsize != VSIZENOTSET && newsize >= 0); |
352 | KASSERT(vp->v_size <= vp->v_writesize); |
353 | KASSERT(vp->v_size == vp->v_writesize || |
354 | newsize == vp->v_writesize || newsize <= vp->v_size); |
355 | |
356 | oldsize = vp->v_writesize; |
357 | |
358 | /* |
359 | * check whether size shrinks |
360 | * if old size hasn't been set, there are no pages to drop |
361 | * if there was an integer overflow in pgend, then this is no shrink |
362 | */ |
363 | if (oldsize > pgend && oldsize != VSIZENOTSET && pgend >= 0) { |
364 | (void) uvn_put(uobj, pgend, 0, PGO_FREE | PGO_SYNCIO); |
365 | mutex_enter(uobj->vmobjlock); |
366 | } |
367 | vp->v_size = vp->v_writesize = newsize; |
368 | mutex_exit(uobj->vmobjlock); |
369 | } |
370 | |
371 | void |
372 | uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize) |
373 | { |
374 | |
375 | mutex_enter(vp->v_interlock); |
376 | KASSERT(newsize != VSIZENOTSET && newsize >= 0); |
377 | KASSERT(vp->v_size != VSIZENOTSET); |
378 | KASSERT(vp->v_writesize != VSIZENOTSET); |
379 | KASSERT(vp->v_size <= vp->v_writesize); |
380 | KASSERT(vp->v_size <= newsize); |
381 | vp->v_writesize = newsize; |
382 | mutex_exit(vp->v_interlock); |
383 | } |
384 | |
385 | bool |
386 | uvn_text_p(struct uvm_object *uobj) |
387 | { |
388 | struct vnode *vp = (struct vnode *)uobj; |
389 | |
390 | return (vp->v_iflag & VI_EXECMAP) != 0; |
391 | } |
392 | |
393 | bool |
394 | uvn_clean_p(struct uvm_object *uobj) |
395 | { |
396 | struct vnode *vp = (struct vnode *)uobj; |
397 | |
398 | return (vp->v_iflag & VI_ONWORKLST) == 0; |
399 | } |
400 | |
401 | bool |
402 | uvn_needs_writefault_p(struct uvm_object *uobj) |
403 | { |
404 | struct vnode *vp = (struct vnode *)uobj; |
405 | |
406 | return uvn_clean_p(uobj) || |
407 | (vp->v_iflag & (VI_WRMAP|VI_WRMAPDIRTY)) == VI_WRMAP; |
408 | } |
409 | |