1 | /* $NetBSD: uvm_bio.c,v 1.83 2015/05/27 19:43:40 rmind Exp $ */ |
2 | |
3 | /* |
4 | * Copyright (c) 1998 Chuck Silvers. |
5 | * All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions |
9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. |
15 | * 3. The name of the author may not be used to endorse or promote products |
16 | * derived from this software without specific prior written permission. |
17 | * |
18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
19 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
20 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
21 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
22 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
23 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
25 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
26 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 | * SUCH DAMAGE. |
29 | * |
30 | */ |
31 | |
32 | /* |
33 | * uvm_bio.c: buffered i/o object mapping cache |
34 | */ |
35 | |
36 | #include <sys/cdefs.h> |
37 | __KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.83 2015/05/27 19:43:40 rmind Exp $" ); |
38 | |
39 | #include "opt_uvmhist.h" |
40 | #include "opt_ubc.h" |
41 | |
42 | #include <sys/param.h> |
43 | #include <sys/systm.h> |
44 | #include <sys/kmem.h> |
45 | #include <sys/kernel.h> |
46 | #include <sys/proc.h> |
47 | #include <sys/vnode.h> |
48 | |
49 | #include <uvm/uvm.h> |
50 | |
51 | /* |
52 | * global data structures |
53 | */ |
54 | |
55 | /* |
56 | * local functions |
57 | */ |
58 | |
59 | static int ubc_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **, |
60 | int, int, vm_prot_t, int); |
61 | static struct ubc_map *ubc_find_mapping(struct uvm_object *, voff_t); |
62 | |
63 | /* |
64 | * local data structues |
65 | */ |
66 | |
67 | #define UBC_HASH(uobj, offset) \ |
68 | (((((u_long)(uobj)) >> 8) + (((u_long)(offset)) >> PAGE_SHIFT)) & \ |
69 | ubc_object.hashmask) |
70 | |
71 | #define UBC_QUEUE(offset) \ |
72 | (&ubc_object.inactive[(((u_long)(offset)) >> ubc_winshift) & \ |
73 | (UBC_NQUEUES - 1)]) |
74 | |
75 | #define UBC_UMAP_ADDR(u) \ |
76 | (vaddr_t)(ubc_object.kva + (((u) - ubc_object.umap) << ubc_winshift)) |
77 | |
78 | |
79 | #define UMAP_PAGES_LOCKED 0x0001 |
80 | #define UMAP_MAPPING_CACHED 0x0002 |
81 | |
82 | struct ubc_map { |
83 | struct uvm_object * uobj; /* mapped object */ |
84 | voff_t offset; /* offset into uobj */ |
85 | voff_t writeoff; /* write offset */ |
86 | vsize_t writelen; /* write len */ |
87 | int refcount; /* refcount on mapping */ |
88 | int flags; /* extra state */ |
89 | int advice; |
90 | |
91 | LIST_ENTRY(ubc_map) hash; /* hash table */ |
92 | TAILQ_ENTRY(ubc_map) inactive; /* inactive queue */ |
93 | LIST_ENTRY(ubc_map) list; /* per-object list */ |
94 | }; |
95 | |
96 | TAILQ_HEAD(ubc_inactive_head, ubc_map); |
97 | static struct ubc_object { |
98 | struct uvm_object uobj; /* glue for uvm_map() */ |
99 | char *kva; /* where ubc_object is mapped */ |
100 | struct ubc_map *umap; /* array of ubc_map's */ |
101 | |
102 | LIST_HEAD(, ubc_map) *hash; /* hashtable for cached ubc_map's */ |
103 | u_long hashmask; /* mask for hashtable */ |
104 | |
105 | struct ubc_inactive_head *inactive; |
106 | /* inactive queues for ubc_map's */ |
107 | } ubc_object; |
108 | |
109 | const struct uvm_pagerops = { |
110 | .pgo_fault = ubc_fault, |
111 | /* ... rest are NULL */ |
112 | }; |
113 | |
114 | int ubc_nwins = UBC_NWINS; |
115 | int ubc_winshift = UBC_WINSHIFT; |
116 | int ubc_winsize; |
117 | #if defined(PMAP_PREFER) |
118 | int ubc_nqueues; |
119 | #define UBC_NQUEUES ubc_nqueues |
120 | #else |
121 | #define UBC_NQUEUES 1 |
122 | #endif |
123 | |
124 | #if defined(UBC_STATS) |
125 | |
126 | #define UBC_EVCNT_DEFINE(name) \ |
127 | struct evcnt ubc_evcnt_##name = \ |
128 | EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "ubc", #name); \ |
129 | EVCNT_ATTACH_STATIC(ubc_evcnt_##name); |
130 | #define UBC_EVCNT_INCR(name) ubc_evcnt_##name.ev_count++ |
131 | |
132 | #else /* defined(UBC_STATS) */ |
133 | |
134 | #define UBC_EVCNT_DEFINE(name) /* nothing */ |
135 | #define UBC_EVCNT_INCR(name) /* nothing */ |
136 | |
137 | #endif /* defined(UBC_STATS) */ |
138 | |
139 | UBC_EVCNT_DEFINE(wincachehit) |
140 | UBC_EVCNT_DEFINE(wincachemiss) |
141 | UBC_EVCNT_DEFINE(faultbusy) |
142 | |
143 | /* |
144 | * ubc_init |
145 | * |
146 | * init pager private data structures. |
147 | */ |
148 | |
149 | void |
150 | ubc_init(void) |
151 | { |
152 | struct ubc_map *umap; |
153 | vaddr_t va; |
154 | int i; |
155 | |
156 | /* |
157 | * Make sure ubc_winshift is sane. |
158 | */ |
159 | if (ubc_winshift < PAGE_SHIFT) |
160 | ubc_winshift = PAGE_SHIFT; |
161 | |
162 | /* |
163 | * init ubc_object. |
164 | * alloc and init ubc_map's. |
165 | * init inactive queues. |
166 | * alloc and init hashtable. |
167 | * map in ubc_object. |
168 | */ |
169 | |
170 | uvm_obj_init(&ubc_object.uobj, &ubc_pager, true, UVM_OBJ_KERN); |
171 | |
172 | ubc_object.umap = kmem_zalloc(ubc_nwins * sizeof(struct ubc_map), |
173 | KM_SLEEP); |
174 | if (ubc_object.umap == NULL) |
175 | panic("ubc_init: failed to allocate ubc_map" ); |
176 | |
177 | if (ubc_winshift < PAGE_SHIFT) { |
178 | ubc_winshift = PAGE_SHIFT; |
179 | } |
180 | va = (vaddr_t)1L; |
181 | #ifdef PMAP_PREFER |
182 | PMAP_PREFER(0, &va, 0, 0); /* kernel is never topdown */ |
183 | ubc_nqueues = va >> ubc_winshift; |
184 | if (ubc_nqueues == 0) { |
185 | ubc_nqueues = 1; |
186 | } |
187 | #endif |
188 | ubc_winsize = 1 << ubc_winshift; |
189 | ubc_object.inactive = kmem_alloc(UBC_NQUEUES * |
190 | sizeof(struct ubc_inactive_head), KM_SLEEP); |
191 | if (ubc_object.inactive == NULL) |
192 | panic("ubc_init: failed to allocate inactive queue heads" ); |
193 | for (i = 0; i < UBC_NQUEUES; i++) { |
194 | TAILQ_INIT(&ubc_object.inactive[i]); |
195 | } |
196 | for (i = 0; i < ubc_nwins; i++) { |
197 | umap = &ubc_object.umap[i]; |
198 | TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)], |
199 | umap, inactive); |
200 | } |
201 | |
202 | ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, true, |
203 | &ubc_object.hashmask); |
204 | for (i = 0; i <= ubc_object.hashmask; i++) { |
205 | LIST_INIT(&ubc_object.hash[i]); |
206 | } |
207 | |
208 | if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva, |
209 | ubc_nwins << ubc_winshift, &ubc_object.uobj, 0, (vsize_t)va, |
210 | UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, |
211 | UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) { |
212 | panic("ubc_init: failed to map ubc_object" ); |
213 | } |
214 | } |
215 | |
216 | void |
217 | ubchist_init(void) |
218 | { |
219 | |
220 | UVMHIST_INIT(ubchist, 300); |
221 | } |
222 | |
223 | /* |
224 | * ubc_fault_page: helper of ubc_fault to handle a single page. |
225 | * |
226 | * => Caller has UVM object locked. |
227 | * => Caller will perform pmap_update(). |
228 | */ |
229 | |
230 | static inline int |
231 | ubc_fault_page(const struct uvm_faultinfo *ufi, const struct ubc_map *umap, |
232 | struct vm_page *pg, vm_prot_t prot, vm_prot_t access_type, vaddr_t va) |
233 | { |
234 | struct uvm_object *uobj; |
235 | vm_prot_t mask; |
236 | int error; |
237 | bool rdonly; |
238 | |
239 | uobj = pg->uobject; |
240 | KASSERT(mutex_owned(uobj->vmobjlock)); |
241 | |
242 | if (pg->flags & PG_WANTED) { |
243 | wakeup(pg); |
244 | } |
245 | KASSERT((pg->flags & PG_FAKE) == 0); |
246 | if (pg->flags & PG_RELEASED) { |
247 | mutex_enter(&uvm_pageqlock); |
248 | uvm_pagefree(pg); |
249 | mutex_exit(&uvm_pageqlock); |
250 | return 0; |
251 | } |
252 | if (pg->loan_count != 0) { |
253 | |
254 | /* |
255 | * Avoid unneeded loan break, if possible. |
256 | */ |
257 | |
258 | if ((access_type & VM_PROT_WRITE) == 0) { |
259 | prot &= ~VM_PROT_WRITE; |
260 | } |
261 | if (prot & VM_PROT_WRITE) { |
262 | struct vm_page *newpg; |
263 | |
264 | newpg = uvm_loanbreak(pg); |
265 | if (newpg == NULL) { |
266 | uvm_page_unbusy(&pg, 1); |
267 | return ENOMEM; |
268 | } |
269 | pg = newpg; |
270 | } |
271 | } |
272 | |
273 | /* |
274 | * Note that a page whose backing store is partially allocated |
275 | * is marked as PG_RDONLY. |
276 | */ |
277 | |
278 | KASSERT((pg->flags & PG_RDONLY) == 0 || |
279 | (access_type & VM_PROT_WRITE) == 0 || |
280 | pg->offset < umap->writeoff || |
281 | pg->offset + PAGE_SIZE > umap->writeoff + umap->writelen); |
282 | |
283 | rdonly = ((access_type & VM_PROT_WRITE) == 0 && |
284 | (pg->flags & PG_RDONLY) != 0) || |
285 | UVM_OBJ_NEEDS_WRITEFAULT(uobj); |
286 | mask = rdonly ? ~VM_PROT_WRITE : VM_PROT_ALL; |
287 | |
288 | error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg), |
289 | prot & mask, PMAP_CANFAIL | (access_type & mask)); |
290 | |
291 | mutex_enter(&uvm_pageqlock); |
292 | uvm_pageactivate(pg); |
293 | mutex_exit(&uvm_pageqlock); |
294 | pg->flags &= ~(PG_BUSY|PG_WANTED); |
295 | UVM_PAGE_OWN(pg, NULL); |
296 | |
297 | return error; |
298 | } |
299 | |
300 | /* |
301 | * ubc_fault: fault routine for ubc mapping |
302 | */ |
303 | |
304 | static int |
305 | ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2, |
306 | int ign3, int ign4, vm_prot_t access_type, int flags) |
307 | { |
308 | struct uvm_object *uobj; |
309 | struct ubc_map *umap; |
310 | vaddr_t va, eva, ubc_offset, slot_offset; |
311 | struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT]; |
312 | int i, error, npages; |
313 | vm_prot_t prot; |
314 | |
315 | UVMHIST_FUNC("ubc_fault" ); UVMHIST_CALLED(ubchist); |
316 | |
317 | /* |
318 | * no need to try with PGO_LOCKED... |
319 | * we don't need to have the map locked since we know that |
320 | * no one will mess with it until our reference is released. |
321 | */ |
322 | |
323 | if (flags & PGO_LOCKED) { |
324 | uvmfault_unlockall(ufi, NULL, &ubc_object.uobj); |
325 | flags &= ~PGO_LOCKED; |
326 | } |
327 | |
328 | va = ufi->orig_rvaddr; |
329 | ubc_offset = va - (vaddr_t)ubc_object.kva; |
330 | umap = &ubc_object.umap[ubc_offset >> ubc_winshift]; |
331 | KASSERT(umap->refcount != 0); |
332 | KASSERT((umap->flags & UMAP_PAGES_LOCKED) == 0); |
333 | slot_offset = ubc_offset & (ubc_winsize - 1); |
334 | |
335 | /* |
336 | * some platforms cannot write to individual bytes atomically, so |
337 | * software has to do read/modify/write of larger quantities instead. |
338 | * this means that the access_type for "write" operations |
339 | * can be VM_PROT_READ, which confuses us mightily. |
340 | * |
341 | * deal with this by resetting access_type based on the info |
342 | * that ubc_alloc() stores for us. |
343 | */ |
344 | |
345 | access_type = umap->writelen ? VM_PROT_WRITE : VM_PROT_READ; |
346 | UVMHIST_LOG(ubchist, "va 0x%lx ubc_offset 0x%lx access_type %d" , |
347 | va, ubc_offset, access_type, 0); |
348 | |
349 | #ifdef DIAGNOSTIC |
350 | if ((access_type & VM_PROT_WRITE) != 0) { |
351 | if (slot_offset < trunc_page(umap->writeoff) || |
352 | umap->writeoff + umap->writelen <= slot_offset) { |
353 | panic("ubc_fault: out of range write" ); |
354 | } |
355 | } |
356 | #endif |
357 | |
358 | /* no umap locking needed since we have a ref on the umap */ |
359 | uobj = umap->uobj; |
360 | |
361 | if ((access_type & VM_PROT_WRITE) == 0) { |
362 | npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT; |
363 | } else { |
364 | npages = (round_page(umap->offset + umap->writeoff + |
365 | umap->writelen) - (umap->offset + slot_offset)) |
366 | >> PAGE_SHIFT; |
367 | flags |= PGO_PASTEOF; |
368 | } |
369 | |
370 | again: |
371 | memset(pgs, 0, sizeof (pgs)); |
372 | mutex_enter(uobj->vmobjlock); |
373 | |
374 | UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x " , |
375 | slot_offset, umap->writeoff, umap->writelen, 0); |
376 | UVMHIST_LOG(ubchist, "getpages uobj %p offset 0x%x npages %d" , |
377 | uobj, umap->offset + slot_offset, npages, 0); |
378 | |
379 | error = (*uobj->pgops->pgo_get)(uobj, umap->offset + slot_offset, pgs, |
380 | &npages, 0, access_type, umap->advice, flags | PGO_NOBLOCKALLOC | |
381 | PGO_NOTIMESTAMP); |
382 | UVMHIST_LOG(ubchist, "getpages error %d npages %d" , error, npages, 0, |
383 | 0); |
384 | |
385 | if (error == EAGAIN) { |
386 | kpause("ubc_fault" , false, hz >> 2, NULL); |
387 | goto again; |
388 | } |
389 | if (error) { |
390 | return error; |
391 | } |
392 | |
393 | /* |
394 | * For virtually-indexed, virtually-tagged caches we should avoid |
395 | * creating writable mappings when we do not absolutely need them, |
396 | * since the "compatible alias" trick does not work on such caches. |
397 | * Otherwise, we can always map the pages writable. |
398 | */ |
399 | |
400 | #ifdef PMAP_CACHE_VIVT |
401 | prot = VM_PROT_READ | access_type; |
402 | #else |
403 | prot = VM_PROT_READ | VM_PROT_WRITE; |
404 | #endif |
405 | |
406 | va = ufi->orig_rvaddr; |
407 | eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT); |
408 | |
409 | UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx" , va, eva, 0, 0); |
410 | |
411 | /* |
412 | * Note: normally all returned pages would have the same UVM object. |
413 | * However, layered file-systems and e.g. tmpfs, may return pages |
414 | * which belong to underlying UVM object. In such case, lock is |
415 | * shared amongst the objects. |
416 | */ |
417 | mutex_enter(uobj->vmobjlock); |
418 | for (i = 0; va < eva; i++, va += PAGE_SIZE) { |
419 | struct vm_page *pg; |
420 | |
421 | UVMHIST_LOG(ubchist, "pgs[%d] = %p" , i, pgs[i], 0, 0); |
422 | pg = pgs[i]; |
423 | |
424 | if (pg == NULL || pg == PGO_DONTCARE) { |
425 | continue; |
426 | } |
427 | KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock); |
428 | error = ubc_fault_page(ufi, umap, pg, prot, access_type, va); |
429 | if (error) { |
430 | /* |
431 | * Flush (there might be pages entered), drop the lock, |
432 | * and perform uvm_wait(). Note: page will re-fault. |
433 | */ |
434 | pmap_update(ufi->orig_map->pmap); |
435 | mutex_exit(uobj->vmobjlock); |
436 | uvm_wait("ubc_fault" ); |
437 | mutex_enter(uobj->vmobjlock); |
438 | } |
439 | } |
440 | /* Must make VA visible before the unlock. */ |
441 | pmap_update(ufi->orig_map->pmap); |
442 | mutex_exit(uobj->vmobjlock); |
443 | |
444 | return 0; |
445 | } |
446 | |
447 | /* |
448 | * local functions |
449 | */ |
450 | |
451 | static struct ubc_map * |
452 | ubc_find_mapping(struct uvm_object *uobj, voff_t offset) |
453 | { |
454 | struct ubc_map *umap; |
455 | |
456 | LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) { |
457 | if (umap->uobj == uobj && umap->offset == offset) { |
458 | return umap; |
459 | } |
460 | } |
461 | return NULL; |
462 | } |
463 | |
464 | |
465 | /* |
466 | * ubc interface functions |
467 | */ |
468 | |
469 | /* |
470 | * ubc_alloc: allocate a file mapping window |
471 | */ |
472 | |
473 | void * |
474 | ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice, |
475 | int flags) |
476 | { |
477 | vaddr_t slot_offset, va; |
478 | struct ubc_map *umap; |
479 | voff_t umap_offset; |
480 | int error; |
481 | UVMHIST_FUNC("ubc_alloc" ); UVMHIST_CALLED(ubchist); |
482 | |
483 | UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx" , |
484 | uobj, offset, *lenp, 0); |
485 | |
486 | KASSERT(*lenp > 0); |
487 | umap_offset = (offset & ~((voff_t)ubc_winsize - 1)); |
488 | slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1)); |
489 | *lenp = MIN(*lenp, ubc_winsize - slot_offset); |
490 | |
491 | mutex_enter(ubc_object.uobj.vmobjlock); |
492 | again: |
493 | /* |
494 | * The UVM object is already referenced. |
495 | * Lock order: UBC object -> ubc_map::uobj. |
496 | */ |
497 | umap = ubc_find_mapping(uobj, umap_offset); |
498 | if (umap == NULL) { |
499 | struct uvm_object *oobj; |
500 | |
501 | UBC_EVCNT_INCR(wincachemiss); |
502 | umap = TAILQ_FIRST(UBC_QUEUE(offset)); |
503 | if (umap == NULL) { |
504 | kpause("ubc_alloc" , false, hz >> 2, |
505 | ubc_object.uobj.vmobjlock); |
506 | goto again; |
507 | } |
508 | |
509 | va = UBC_UMAP_ADDR(umap); |
510 | oobj = umap->uobj; |
511 | |
512 | /* |
513 | * Remove from old hash (if any), add to new hash. |
514 | */ |
515 | |
516 | if (oobj != NULL) { |
517 | /* |
518 | * Mapping must be removed before the list entry, |
519 | * since there is a race with ubc_purge(). |
520 | */ |
521 | if (umap->flags & UMAP_MAPPING_CACHED) { |
522 | umap->flags &= ~UMAP_MAPPING_CACHED; |
523 | mutex_enter(oobj->vmobjlock); |
524 | pmap_remove(pmap_kernel(), va, |
525 | va + ubc_winsize); |
526 | pmap_update(pmap_kernel()); |
527 | mutex_exit(oobj->vmobjlock); |
528 | } |
529 | LIST_REMOVE(umap, hash); |
530 | LIST_REMOVE(umap, list); |
531 | } else { |
532 | KASSERT((umap->flags & UMAP_MAPPING_CACHED) == 0); |
533 | } |
534 | umap->uobj = uobj; |
535 | umap->offset = umap_offset; |
536 | LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)], |
537 | umap, hash); |
538 | LIST_INSERT_HEAD(&uobj->uo_ubc, umap, list); |
539 | } else { |
540 | UBC_EVCNT_INCR(wincachehit); |
541 | va = UBC_UMAP_ADDR(umap); |
542 | } |
543 | |
544 | if (umap->refcount == 0) { |
545 | TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive); |
546 | } |
547 | |
548 | if (flags & UBC_WRITE) { |
549 | KASSERTMSG(umap->writeoff == 0 && umap->writelen == 0, |
550 | "ubc_alloc: concurrent writes to uobj %p" , uobj); |
551 | umap->writeoff = slot_offset; |
552 | umap->writelen = *lenp; |
553 | } |
554 | |
555 | umap->refcount++; |
556 | umap->advice = advice; |
557 | mutex_exit(ubc_object.uobj.vmobjlock); |
558 | UVMHIST_LOG(ubchist, "umap %p refs %d va %p flags 0x%x" , |
559 | umap, umap->refcount, va, flags); |
560 | |
561 | if (flags & UBC_FAULTBUSY) { |
562 | int npages = (*lenp + PAGE_SIZE - 1) >> PAGE_SHIFT; |
563 | struct vm_page *pgs[npages]; |
564 | int gpflags = |
565 | PGO_SYNCIO|PGO_OVERWRITE|PGO_PASTEOF|PGO_NOBLOCKALLOC| |
566 | PGO_NOTIMESTAMP; |
567 | int i; |
568 | KDASSERT(flags & UBC_WRITE); |
569 | KASSERT(umap->refcount == 1); |
570 | |
571 | UBC_EVCNT_INCR(faultbusy); |
572 | again_faultbusy: |
573 | mutex_enter(uobj->vmobjlock); |
574 | if (umap->flags & UMAP_MAPPING_CACHED) { |
575 | umap->flags &= ~UMAP_MAPPING_CACHED; |
576 | pmap_remove(pmap_kernel(), va, va + ubc_winsize); |
577 | } |
578 | memset(pgs, 0, sizeof(pgs)); |
579 | |
580 | error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs, |
581 | &npages, 0, VM_PROT_READ | VM_PROT_WRITE, advice, gpflags); |
582 | UVMHIST_LOG(ubchist, "faultbusy getpages %d" , error, 0, 0, 0); |
583 | if (error) { |
584 | /* |
585 | * Flush: the mapping above might have been removed. |
586 | */ |
587 | pmap_update(pmap_kernel()); |
588 | goto out; |
589 | } |
590 | for (i = 0; i < npages; i++) { |
591 | struct vm_page *pg = pgs[i]; |
592 | |
593 | KASSERT(pg->uobject == uobj); |
594 | if (pg->loan_count != 0) { |
595 | mutex_enter(uobj->vmobjlock); |
596 | if (pg->loan_count != 0) { |
597 | pg = uvm_loanbreak(pg); |
598 | } |
599 | if (pg == NULL) { |
600 | pmap_kremove(va, ubc_winsize); |
601 | pmap_update(pmap_kernel()); |
602 | uvm_page_unbusy(pgs, npages); |
603 | mutex_exit(uobj->vmobjlock); |
604 | uvm_wait("ubc_alloc" ); |
605 | goto again_faultbusy; |
606 | } |
607 | mutex_exit(uobj->vmobjlock); |
608 | pgs[i] = pg; |
609 | } |
610 | pmap_kenter_pa(va + slot_offset + (i << PAGE_SHIFT), |
611 | VM_PAGE_TO_PHYS(pg), |
612 | VM_PROT_READ | VM_PROT_WRITE, 0); |
613 | } |
614 | pmap_update(pmap_kernel()); |
615 | umap->flags |= UMAP_PAGES_LOCKED; |
616 | } else { |
617 | KASSERT((umap->flags & UMAP_PAGES_LOCKED) == 0); |
618 | } |
619 | |
620 | out: |
621 | return (void *)(va + slot_offset); |
622 | } |
623 | |
624 | /* |
625 | * ubc_release: free a file mapping window. |
626 | */ |
627 | |
628 | void |
629 | ubc_release(void *va, int flags) |
630 | { |
631 | struct ubc_map *umap; |
632 | struct uvm_object *uobj; |
633 | vaddr_t umapva; |
634 | bool unmapped; |
635 | UVMHIST_FUNC("ubc_release" ); UVMHIST_CALLED(ubchist); |
636 | |
637 | UVMHIST_LOG(ubchist, "va %p" , va, 0, 0, 0); |
638 | umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift]; |
639 | umapva = UBC_UMAP_ADDR(umap); |
640 | uobj = umap->uobj; |
641 | KASSERT(uobj != NULL); |
642 | |
643 | if (umap->flags & UMAP_PAGES_LOCKED) { |
644 | const voff_t slot_offset = umap->writeoff; |
645 | const voff_t endoff = umap->writeoff + umap->writelen; |
646 | const voff_t zerolen = round_page(endoff) - endoff; |
647 | const u_int npages = (round_page(endoff) - |
648 | trunc_page(slot_offset)) >> PAGE_SHIFT; |
649 | struct vm_page *pgs[npages]; |
650 | |
651 | KASSERT((umap->flags & UMAP_MAPPING_CACHED) == 0); |
652 | if (zerolen) { |
653 | memset((char *)umapva + endoff, 0, zerolen); |
654 | } |
655 | umap->flags &= ~UMAP_PAGES_LOCKED; |
656 | mutex_enter(uobj->vmobjlock); |
657 | mutex_enter(&uvm_pageqlock); |
658 | for (u_int i = 0; i < npages; i++) { |
659 | paddr_t pa; |
660 | bool rv __diagused; |
661 | |
662 | rv = pmap_extract(pmap_kernel(), |
663 | umapva + slot_offset + (i << PAGE_SHIFT), &pa); |
664 | KASSERT(rv); |
665 | pgs[i] = PHYS_TO_VM_PAGE(pa); |
666 | pgs[i]->flags &= ~(PG_FAKE|PG_CLEAN); |
667 | KASSERT(pgs[i]->loan_count == 0); |
668 | uvm_pageactivate(pgs[i]); |
669 | } |
670 | mutex_exit(&uvm_pageqlock); |
671 | pmap_kremove(umapva, ubc_winsize); |
672 | pmap_update(pmap_kernel()); |
673 | uvm_page_unbusy(pgs, npages); |
674 | mutex_exit(uobj->vmobjlock); |
675 | unmapped = true; |
676 | } else { |
677 | unmapped = false; |
678 | } |
679 | |
680 | mutex_enter(ubc_object.uobj.vmobjlock); |
681 | umap->writeoff = 0; |
682 | umap->writelen = 0; |
683 | umap->refcount--; |
684 | if (umap->refcount == 0) { |
685 | if (flags & UBC_UNMAP) { |
686 | /* |
687 | * Invalidate any cached mappings if requested. |
688 | * This is typically used to avoid leaving |
689 | * incompatible cache aliases around indefinitely. |
690 | */ |
691 | mutex_enter(uobj->vmobjlock); |
692 | pmap_remove(pmap_kernel(), umapva, |
693 | umapva + ubc_winsize); |
694 | pmap_update(pmap_kernel()); |
695 | mutex_exit(uobj->vmobjlock); |
696 | |
697 | umap->flags &= ~UMAP_MAPPING_CACHED; |
698 | LIST_REMOVE(umap, hash); |
699 | LIST_REMOVE(umap, list); |
700 | umap->uobj = NULL; |
701 | TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, |
702 | inactive); |
703 | } else { |
704 | if (!unmapped) { |
705 | umap->flags |= UMAP_MAPPING_CACHED; |
706 | } |
707 | TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap, |
708 | inactive); |
709 | } |
710 | } |
711 | UVMHIST_LOG(ubchist, "umap %p refs %d" , umap, umap->refcount, 0, 0); |
712 | mutex_exit(ubc_object.uobj.vmobjlock); |
713 | } |
714 | |
715 | /* |
716 | * ubc_uiomove: move data to/from an object. |
717 | */ |
718 | |
719 | int |
720 | ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice, |
721 | int flags) |
722 | { |
723 | const bool overwrite = (flags & UBC_FAULTBUSY) != 0; |
724 | voff_t off; |
725 | int error; |
726 | |
727 | KASSERT(todo <= uio->uio_resid); |
728 | KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) || |
729 | ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ)); |
730 | |
731 | off = uio->uio_offset; |
732 | error = 0; |
733 | while (todo > 0) { |
734 | vsize_t bytelen = todo; |
735 | void *win; |
736 | |
737 | win = ubc_alloc(uobj, off, &bytelen, advice, flags); |
738 | if (error == 0) { |
739 | error = uiomove(win, bytelen, uio); |
740 | } |
741 | if (error != 0 && overwrite) { |
742 | /* |
743 | * if we haven't initialized the pages yet, |
744 | * do it now. it's safe to use memset here |
745 | * because we just mapped the pages above. |
746 | */ |
747 | printf("%s: error=%d\n" , __func__, error); |
748 | memset(win, 0, bytelen); |
749 | } |
750 | ubc_release(win, flags); |
751 | off += bytelen; |
752 | todo -= bytelen; |
753 | if (error != 0 && (flags & UBC_PARTIALOK) != 0) { |
754 | break; |
755 | } |
756 | } |
757 | |
758 | return error; |
759 | } |
760 | |
761 | /* |
762 | * ubc_zerorange: set a range of bytes in an object to zero. |
763 | */ |
764 | |
765 | void |
766 | ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags) |
767 | { |
768 | void *win; |
769 | |
770 | /* |
771 | * XXXUBC invent kzero() and use it |
772 | */ |
773 | |
774 | while (len) { |
775 | vsize_t bytelen = len; |
776 | |
777 | win = ubc_alloc(uobj, off, &bytelen, UVM_ADV_NORMAL, UBC_WRITE); |
778 | memset(win, 0, bytelen); |
779 | ubc_release(win, flags); |
780 | |
781 | off += bytelen; |
782 | len -= bytelen; |
783 | } |
784 | } |
785 | |
786 | /* |
787 | * ubc_purge: disassociate ubc_map structures from an empty uvm_object. |
788 | */ |
789 | |
790 | void |
791 | ubc_purge(struct uvm_object *uobj) |
792 | { |
793 | struct ubc_map *umap; |
794 | vaddr_t va; |
795 | |
796 | KASSERT(uobj->uo_npages == 0); |
797 | |
798 | /* |
799 | * Safe to check without lock held, as ubc_alloc() removes |
800 | * the mapping and list entry in the correct order. |
801 | */ |
802 | if (__predict_true(LIST_EMPTY(&uobj->uo_ubc))) { |
803 | return; |
804 | } |
805 | mutex_enter(ubc_object.uobj.vmobjlock); |
806 | while ((umap = LIST_FIRST(&uobj->uo_ubc)) != NULL) { |
807 | KASSERT(umap->refcount == 0); |
808 | for (va = 0; va < ubc_winsize; va += PAGE_SIZE) { |
809 | KASSERT(!pmap_extract(pmap_kernel(), |
810 | va + UBC_UMAP_ADDR(umap), NULL)); |
811 | } |
812 | LIST_REMOVE(umap, list); |
813 | LIST_REMOVE(umap, hash); |
814 | umap->flags &= ~UMAP_MAPPING_CACHED; |
815 | umap->uobj = NULL; |
816 | } |
817 | mutex_exit(ubc_object.uobj.vmobjlock); |
818 | } |
819 | |