1/* $NetBSD: uvm_mmap.c,v 1.162 2016/08/09 12:17:04 kre Exp $ */
2
3/*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993 The Regents of the University of California.
6 * Copyright (c) 1988 University of Utah.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * the Systems Programming Group of the University of Utah Computer
12 * Science Department.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
40 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
41 */
42
43/*
44 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
45 * function.
46 */
47
48#include <sys/cdefs.h>
49__KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.162 2016/08/09 12:17:04 kre Exp $");
50
51#include "opt_compat_netbsd.h"
52#include "opt_pax.h"
53
54#include <sys/types.h>
55#include <sys/file.h>
56#include <sys/filedesc.h>
57#include <sys/resourcevar.h>
58#include <sys/mman.h>
59#include <sys/pax.h>
60
61#include <sys/syscallargs.h>
62
63#include <uvm/uvm.h>
64#include <uvm/uvm_device.h>
65
66static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
67 int, int, struct uvm_object *, voff_t, vsize_t);
68
69static int
70range_test(struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
71{
72 vaddr_t vm_min_address = vm_map_min(map);
73 vaddr_t vm_max_address = vm_map_max(map);
74 vaddr_t eaddr = addr + size;
75 int res = 0;
76
77 if (addr < vm_min_address)
78 return EINVAL;
79 if (eaddr > vm_max_address)
80 return ismmap ? EFBIG : EINVAL;
81 if (addr > eaddr) /* no wrapping! */
82 return ismmap ? EOVERFLOW : EINVAL;
83
84#ifdef MD_MMAP_RANGE_TEST
85 res = MD_MMAP_RANGE_TEST(addr, eaddr);
86#endif
87
88 return res;
89}
90
91/*
92 * unimplemented VM system calls:
93 */
94
95/*
96 * sys_sbrk: sbrk system call.
97 */
98
99/* ARGSUSED */
100int
101sys_sbrk(struct lwp *l, const struct sys_sbrk_args *uap, register_t *retval)
102{
103 /* {
104 syscallarg(intptr_t) incr;
105 } */
106
107 return ENOSYS;
108}
109
110/*
111 * sys_sstk: sstk system call.
112 */
113
114/* ARGSUSED */
115int
116sys_sstk(struct lwp *l, const struct sys_sstk_args *uap, register_t *retval)
117{
118 /* {
119 syscallarg(int) incr;
120 } */
121
122 return ENOSYS;
123}
124
125/*
126 * sys_mincore: determine if pages are in core or not.
127 */
128
129/* ARGSUSED */
130int
131sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
132 register_t *retval)
133{
134 /* {
135 syscallarg(void *) addr;
136 syscallarg(size_t) len;
137 syscallarg(char *) vec;
138 } */
139 struct proc *p = l->l_proc;
140 struct vm_page *pg;
141 char *vec, pgi;
142 struct uvm_object *uobj;
143 struct vm_amap *amap;
144 struct vm_anon *anon;
145 struct vm_map_entry *entry;
146 vaddr_t start, end, lim;
147 struct vm_map *map;
148 vsize_t len;
149 int error = 0, npgs;
150
151 map = &p->p_vmspace->vm_map;
152
153 start = (vaddr_t)SCARG(uap, addr);
154 len = SCARG(uap, len);
155 vec = SCARG(uap, vec);
156
157 if (start & PAGE_MASK)
158 return EINVAL;
159 len = round_page(len);
160 end = start + len;
161 if (end <= start)
162 return EINVAL;
163
164 /*
165 * Lock down vec, so our returned status isn't outdated by
166 * storing the status byte for a page.
167 */
168
169 npgs = len >> PAGE_SHIFT;
170 error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
171 if (error) {
172 return error;
173 }
174 vm_map_lock_read(map);
175
176 if (uvm_map_lookup_entry(map, start, &entry) == false) {
177 error = ENOMEM;
178 goto out;
179 }
180
181 for (/* nothing */;
182 entry != &map->header && entry->start < end;
183 entry = entry->next) {
184 KASSERT(!UVM_ET_ISSUBMAP(entry));
185 KASSERT(start >= entry->start);
186
187 /* Make sure there are no holes. */
188 if (entry->end < end &&
189 (entry->next == &map->header ||
190 entry->next->start > entry->end)) {
191 error = ENOMEM;
192 goto out;
193 }
194
195 lim = end < entry->end ? end : entry->end;
196
197 /*
198 * Special case for objects with no "real" pages. Those
199 * are always considered resident (mapped devices).
200 */
201
202 if (UVM_ET_ISOBJ(entry)) {
203 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
204 if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
205 for (/* nothing */; start < lim;
206 start += PAGE_SIZE, vec++)
207 subyte(vec, 1);
208 continue;
209 }
210 }
211
212 amap = entry->aref.ar_amap; /* upper layer */
213 uobj = entry->object.uvm_obj; /* lower layer */
214
215 if (amap != NULL)
216 amap_lock(amap);
217 if (uobj != NULL)
218 mutex_enter(uobj->vmobjlock);
219
220 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
221 pgi = 0;
222 if (amap != NULL) {
223 /* Check the upper layer first. */
224 anon = amap_lookup(&entry->aref,
225 start - entry->start);
226 /* Don't need to lock anon here. */
227 if (anon != NULL && anon->an_page != NULL) {
228
229 /*
230 * Anon has the page for this entry
231 * offset.
232 */
233
234 pgi = 1;
235 }
236 }
237 if (uobj != NULL && pgi == 0) {
238 /* Check the lower layer. */
239 pg = uvm_pagelookup(uobj,
240 entry->offset + (start - entry->start));
241 if (pg != NULL) {
242
243 /*
244 * Object has the page for this entry
245 * offset.
246 */
247
248 pgi = 1;
249 }
250 }
251 (void) subyte(vec, pgi);
252 }
253 if (uobj != NULL)
254 mutex_exit(uobj->vmobjlock);
255 if (amap != NULL)
256 amap_unlock(amap);
257 }
258
259 out:
260 vm_map_unlock_read(map);
261 uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
262 return error;
263}
264
265/*
266 * sys_mmap: mmap system call.
267 *
268 * => file offset and address may not be page aligned
269 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
270 * - if address isn't page aligned the mapping starts at trunc_page(addr)
271 * and the return value is adjusted up by the page offset.
272 */
273
274int
275sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
276{
277 /* {
278 syscallarg(void *) addr;
279 syscallarg(size_t) len;
280 syscallarg(int) prot;
281 syscallarg(int) flags;
282 syscallarg(int) fd;
283 syscallarg(long) pad;
284 syscallarg(off_t) pos;
285 } */
286 struct proc *p = l->l_proc;
287 vaddr_t addr;
288 off_t pos;
289 vsize_t size, pageoff, newsize;
290 vm_prot_t prot, maxprot;
291 int flags, fd, advice;
292 vaddr_t defaddr;
293 struct file *fp = NULL;
294 struct uvm_object *uobj;
295 int error;
296#ifdef PAX_ASLR
297 vaddr_t orig_addr;
298#endif /* PAX_ASLR */
299
300 /*
301 * first, extract syscall args from the uap.
302 */
303
304 addr = (vaddr_t)SCARG(uap, addr);
305 size = (vsize_t)SCARG(uap, len);
306 prot = SCARG(uap, prot) & VM_PROT_ALL;
307 flags = SCARG(uap, flags);
308 fd = SCARG(uap, fd);
309 pos = SCARG(uap, pos);
310
311#ifdef PAX_ASLR
312 orig_addr = addr;
313#endif /* PAX_ASLR */
314
315 /*
316 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
317 * validate the flags.
318 */
319 if (flags & MAP_COPY) {
320 flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
321#if defined(COMPAT_10) && defined(__i386__)
322 /*
323 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least
324 * and ld.so did not turn it on. We take care of this on amd64
325 * in compat32.
326 */
327 prot |= PROT_EXEC;
328#endif
329 }
330 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
331 return EINVAL;
332
333 /*
334 * align file position and save offset. adjust size.
335 */
336
337 pageoff = (pos & PAGE_MASK);
338 pos -= pageoff;
339 newsize = size + pageoff; /* add offset */
340 newsize = (vsize_t)round_page(newsize); /* round up */
341
342 if (newsize < size)
343 return ENOMEM;
344 size = newsize;
345
346 /*
347 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
348 */
349 if (flags & MAP_FIXED) {
350 /* ensure address and file offset are aligned properly */
351 addr -= pageoff;
352 if (addr & PAGE_MASK)
353 return EINVAL;
354
355 error = range_test(&p->p_vmspace->vm_map, addr, size, true);
356 if (error) {
357 return error;
358 }
359 } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
360 /*
361 * not fixed: make sure we skip over the largest
362 * possible heap for non-topdown mapping arrangements.
363 * we will refine our guess later (e.g. to account for
364 * VAC, etc)
365 */
366
367 defaddr = p->p_emul->e_vm_default_addr(p,
368 (vaddr_t)p->p_vmspace->vm_daddr, size,
369 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
370
371 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
372 addr = MAX(addr, defaddr);
373 else
374 addr = MIN(addr, defaddr);
375 }
376
377 /*
378 * check for file mappings (i.e. not anonymous) and verify file.
379 */
380
381 advice = UVM_ADV_NORMAL;
382 if ((flags & MAP_ANON) == 0) {
383 if ((fp = fd_getfile(fd)) == NULL)
384 return EBADF;
385
386 if (fp->f_ops->fo_mmap == NULL) {
387 error = ENODEV;
388 goto out;
389 }
390 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
391 &advice, &uobj, &maxprot);
392 if (error) {
393 goto out;
394 }
395 if (uobj == NULL) {
396 flags |= MAP_ANON;
397 fd_putfile(fd);
398 fp = NULL;
399 goto is_anon;
400 }
401 } else { /* MAP_ANON case */
402 /*
403 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
404 */
405 if (fd != -1)
406 return EINVAL;
407
408 is_anon: /* label for SunOS style /dev/zero */
409 uobj = NULL;
410 maxprot = VM_PROT_ALL;
411 pos = 0;
412 }
413
414 PAX_MPROTECT_ADJUST(l, &prot, &maxprot);
415
416 pax_aslr_mmap(l, &addr, orig_addr, flags);
417
418 /*
419 * now let kernel internal function uvm_mmap do the work.
420 */
421
422 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
423 flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
424
425 /* remember to add offset */
426 *retval = (register_t)(addr + pageoff);
427
428 out:
429 if (fp != NULL)
430 fd_putfile(fd);
431
432 return error;
433}
434
435/*
436 * sys___msync13: the msync system call (a front-end for flush)
437 */
438
439int
440sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
441 register_t *retval)
442{
443 /* {
444 syscallarg(void *) addr;
445 syscallarg(size_t) len;
446 syscallarg(int) flags;
447 } */
448 struct proc *p = l->l_proc;
449 vaddr_t addr;
450 vsize_t size, pageoff;
451 struct vm_map *map;
452 int error, flags, uvmflags;
453 bool rv;
454
455 /*
456 * extract syscall args from the uap
457 */
458
459 addr = (vaddr_t)SCARG(uap, addr);
460 size = (vsize_t)SCARG(uap, len);
461 flags = SCARG(uap, flags);
462
463 /* sanity check flags */
464 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
465 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
466 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
467 return EINVAL;
468 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
469 flags |= MS_SYNC;
470
471 /*
472 * align the address to a page boundary and adjust the size accordingly.
473 */
474
475 pageoff = (addr & PAGE_MASK);
476 addr -= pageoff;
477 size += pageoff;
478 size = (vsize_t)round_page(size);
479
480
481 /*
482 * get map
483 */
484 map = &p->p_vmspace->vm_map;
485
486 error = range_test(map, addr, size, false);
487 if (error)
488 return ENOMEM;
489
490 /*
491 * XXXCDC: do we really need this semantic?
492 *
493 * XXX Gak! If size is zero we are supposed to sync "all modified
494 * pages with the region containing addr". Unfortunately, we
495 * don't really keep track of individual mmaps so we approximate
496 * by flushing the range of the map entry containing addr.
497 * This can be incorrect if the region splits or is coalesced
498 * with a neighbor.
499 */
500
501 if (size == 0) {
502 struct vm_map_entry *entry;
503
504 vm_map_lock_read(map);
505 rv = uvm_map_lookup_entry(map, addr, &entry);
506 if (rv == true) {
507 addr = entry->start;
508 size = entry->end - entry->start;
509 }
510 vm_map_unlock_read(map);
511 if (rv == false)
512 return EINVAL;
513 }
514
515 /*
516 * translate MS_ flags into PGO_ flags
517 */
518
519 uvmflags = PGO_CLEANIT;
520 if (flags & MS_INVALIDATE)
521 uvmflags |= PGO_FREE;
522 if (flags & MS_SYNC)
523 uvmflags |= PGO_SYNCIO;
524
525 error = uvm_map_clean(map, addr, addr+size, uvmflags);
526 return error;
527}
528
529/*
530 * sys_munmap: unmap a users memory
531 */
532
533int
534sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
535{
536 /* {
537 syscallarg(void *) addr;
538 syscallarg(size_t) len;
539 } */
540 struct proc *p = l->l_proc;
541 vaddr_t addr;
542 vsize_t size, pageoff;
543 struct vm_map *map;
544 struct vm_map_entry *dead_entries;
545 int error;
546
547 /*
548 * get syscall args.
549 */
550
551 addr = (vaddr_t)SCARG(uap, addr);
552 size = (vsize_t)SCARG(uap, len);
553
554 /*
555 * align the address to a page boundary and adjust the size accordingly.
556 */
557
558 pageoff = (addr & PAGE_MASK);
559 addr -= pageoff;
560 size += pageoff;
561 size = (vsize_t)round_page(size);
562
563 if (size == 0)
564 return 0;
565
566 map = &p->p_vmspace->vm_map;
567
568 error = range_test(map, addr, size, false);
569 if (error)
570 return EINVAL;
571
572 vm_map_lock(map);
573#if 0
574 /*
575 * interesting system call semantic: make sure entire range is
576 * allocated before allowing an unmap.
577 */
578 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
579 vm_map_unlock(map);
580 return EINVAL;
581 }
582#endif
583 uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
584 vm_map_unlock(map);
585 if (dead_entries != NULL)
586 uvm_unmap_detach(dead_entries, 0);
587 return 0;
588}
589
590/*
591 * sys_mprotect: the mprotect system call
592 */
593
594int
595sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
596 register_t *retval)
597{
598 /* {
599 syscallarg(void *) addr;
600 syscallarg(size_t) len;
601 syscallarg(int) prot;
602 } */
603 struct proc *p = l->l_proc;
604 vaddr_t addr;
605 vsize_t size, pageoff;
606 vm_prot_t prot;
607 int error;
608
609 /*
610 * extract syscall args from uap
611 */
612
613 addr = (vaddr_t)SCARG(uap, addr);
614 size = (vsize_t)SCARG(uap, len);
615 prot = SCARG(uap, prot) & VM_PROT_ALL;
616
617 /*
618 * align the address to a page boundary and adjust the size accordingly.
619 */
620
621 pageoff = (addr & PAGE_MASK);
622 addr -= pageoff;
623 size += pageoff;
624 size = round_page(size);
625
626 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
627 if (error)
628 return EINVAL;
629
630 error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
631 false);
632 return error;
633}
634
635/*
636 * sys_minherit: the minherit system call
637 */
638
639int
640sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
641 register_t *retval)
642{
643 /* {
644 syscallarg(void *) addr;
645 syscallarg(int) len;
646 syscallarg(int) inherit;
647 } */
648 struct proc *p = l->l_proc;
649 vaddr_t addr;
650 vsize_t size, pageoff;
651 vm_inherit_t inherit;
652 int error;
653
654 addr = (vaddr_t)SCARG(uap, addr);
655 size = (vsize_t)SCARG(uap, len);
656 inherit = SCARG(uap, inherit);
657
658 /*
659 * align the address to a page boundary and adjust the size accordingly.
660 */
661
662 pageoff = (addr & PAGE_MASK);
663 addr -= pageoff;
664 size += pageoff;
665 size = (vsize_t)round_page(size);
666
667 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
668 if (error)
669 return EINVAL;
670
671 error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
672 inherit);
673 return error;
674}
675
676/*
677 * sys_madvise: give advice about memory usage.
678 */
679
680/* ARGSUSED */
681int
682sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
683 register_t *retval)
684{
685 /* {
686 syscallarg(void *) addr;
687 syscallarg(size_t) len;
688 syscallarg(int) behav;
689 } */
690 struct proc *p = l->l_proc;
691 vaddr_t addr;
692 vsize_t size, pageoff;
693 int advice, error;
694
695 addr = (vaddr_t)SCARG(uap, addr);
696 size = (vsize_t)SCARG(uap, len);
697 advice = SCARG(uap, behav);
698
699 /*
700 * align the address to a page boundary, and adjust the size accordingly
701 */
702
703 pageoff = (addr & PAGE_MASK);
704 addr -= pageoff;
705 size += pageoff;
706 size = (vsize_t)round_page(size);
707
708 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
709 if (error)
710 return EINVAL;
711
712 switch (advice) {
713 case MADV_NORMAL:
714 case MADV_RANDOM:
715 case MADV_SEQUENTIAL:
716 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
717 advice);
718 break;
719
720 case MADV_WILLNEED:
721
722 /*
723 * Activate all these pages, pre-faulting them in if
724 * necessary.
725 */
726 error = uvm_map_willneed(&p->p_vmspace->vm_map,
727 addr, addr + size);
728 break;
729
730 case MADV_DONTNEED:
731
732 /*
733 * Deactivate all these pages. We don't need them
734 * any more. We don't, however, toss the data in
735 * the pages.
736 */
737
738 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
739 PGO_DEACTIVATE);
740 break;
741
742 case MADV_FREE:
743
744 /*
745 * These pages contain no valid data, and may be
746 * garbage-collected. Toss all resources, including
747 * any swap space in use.
748 */
749
750 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
751 PGO_FREE);
752 break;
753
754 case MADV_SPACEAVAIL:
755
756 /*
757 * XXXMRG What is this? I think it's:
758 *
759 * Ensure that we have allocated backing-store
760 * for these pages.
761 *
762 * This is going to require changes to the page daemon,
763 * as it will free swap space allocated to pages in core.
764 * There's also what to do for device/file/anonymous memory.
765 */
766
767 return EINVAL;
768
769 default:
770 return EINVAL;
771 }
772
773 return error;
774}
775
776/*
777 * sys_mlock: memory lock
778 */
779
780int
781sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
782{
783 /* {
784 syscallarg(const void *) addr;
785 syscallarg(size_t) len;
786 } */
787 struct proc *p = l->l_proc;
788 vaddr_t addr;
789 vsize_t size, pageoff;
790 int error;
791
792 /*
793 * extract syscall args from uap
794 */
795
796 addr = (vaddr_t)SCARG(uap, addr);
797 size = (vsize_t)SCARG(uap, len);
798
799 /*
800 * align the address to a page boundary and adjust the size accordingly
801 */
802
803 pageoff = (addr & PAGE_MASK);
804 addr -= pageoff;
805 size += pageoff;
806 size = (vsize_t)round_page(size);
807
808 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
809 if (error)
810 return ENOMEM;
811
812 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
813 return EAGAIN;
814
815 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
816 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
817 return EAGAIN;
818
819 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
820 0);
821 if (error == EFAULT)
822 error = ENOMEM;
823 return error;
824}
825
826/*
827 * sys_munlock: unlock wired pages
828 */
829
830int
831sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
832 register_t *retval)
833{
834 /* {
835 syscallarg(const void *) addr;
836 syscallarg(size_t) len;
837 } */
838 struct proc *p = l->l_proc;
839 vaddr_t addr;
840 vsize_t size, pageoff;
841 int error;
842
843 /*
844 * extract syscall args from uap
845 */
846
847 addr = (vaddr_t)SCARG(uap, addr);
848 size = (vsize_t)SCARG(uap, len);
849
850 /*
851 * align the address to a page boundary, and adjust the size accordingly
852 */
853
854 pageoff = (addr & PAGE_MASK);
855 addr -= pageoff;
856 size += pageoff;
857 size = (vsize_t)round_page(size);
858
859 error = range_test(&p->p_vmspace->vm_map, addr, size, false);
860 if (error)
861 return ENOMEM;
862
863 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true,
864 0);
865 if (error)
866 return ENOMEM;
867
868 return 0;
869}
870
871/*
872 * sys_mlockall: lock all pages mapped into an address space.
873 */
874
875int
876sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
877 register_t *retval)
878{
879 /* {
880 syscallarg(int) flags;
881 } */
882 struct proc *p = l->l_proc;
883 int error, flags;
884
885 flags = SCARG(uap, flags);
886
887 if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
888 return EINVAL;
889
890 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
891 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
892 return error;
893}
894
895/*
896 * sys_munlockall: unlock all pages mapped into an address space.
897 */
898
899int
900sys_munlockall(struct lwp *l, const void *v, register_t *retval)
901{
902 struct proc *p = l->l_proc;
903
904 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
905 return 0;
906}
907
908/*
909 * uvm_mmap: internal version of mmap
910 *
911 * - used by sys_mmap and various framebuffers
912 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
913 * - caller must page-align the file offset
914 */
915
916int
917uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
918 vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
919 voff_t foff, vsize_t locklimit)
920{
921 vaddr_t align = 0;
922 int error;
923 uvm_flag_t uvmflag = 0;
924
925 /*
926 * check params
927 */
928
929 if (size == 0)
930 return 0;
931 if (foff & PAGE_MASK)
932 return EINVAL;
933 if ((prot & maxprot) != prot)
934 return EINVAL;
935
936 /*
937 * for non-fixed mappings, round off the suggested address.
938 * for fixed mappings, check alignment and zap old mappings.
939 */
940
941 if ((flags & MAP_FIXED) == 0) {
942 *addr = round_page(*addr);
943 } else {
944 if (*addr & PAGE_MASK)
945 return EINVAL;
946 uvmflag |= UVM_FLAG_FIXED;
947 (void) uvm_unmap(map, *addr, *addr + size);
948 }
949
950 /*
951 * Try to see if any requested alignment can even be attemped.
952 * Make sure we can express the alignment (asking for a >= 4GB
953 * alignment on an ILP32 architecure make no sense) and the
954 * alignment is at least for a page sized quanitiy. If the
955 * request was for a fixed mapping, make sure supplied address
956 * adheres to the request alignment.
957 */
958 align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
959 if (align) {
960 if (align >= sizeof(vaddr_t) * NBBY)
961 return EINVAL;
962 align = 1L << align;
963 if (align < PAGE_SIZE)
964 return EINVAL;
965 if (align >= vm_map_max(map))
966 return ENOMEM;
967 if (flags & MAP_FIXED) {
968 if ((*addr & (align-1)) != 0)
969 return EINVAL;
970 align = 0;
971 }
972 }
973
974 /*
975 * check resource limits
976 */
977
978 if (!VM_MAP_IS_KERNEL(map) &&
979 (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
980 curproc->p_rlimit[RLIMIT_AS].rlim_cur))
981 return ENOMEM;
982
983 /*
984 * handle anon vs. non-anon mappings. for non-anon mappings attach
985 * to underlying vm object.
986 */
987
988 if (flags & MAP_ANON) {
989 KASSERT(uobj == NULL);
990 foff = UVM_UNKNOWN_OFFSET;
991 if ((flags & MAP_SHARED) == 0)
992 /* XXX: defer amap create */
993 uvmflag |= UVM_FLAG_COPYONW;
994 else
995 /* shared: create amap now */
996 uvmflag |= UVM_FLAG_OVERLAY;
997
998 } else {
999 KASSERT(uobj != NULL);
1000 if ((flags & MAP_SHARED) == 0) {
1001 uvmflag |= UVM_FLAG_COPYONW;
1002 }
1003 }
1004
1005 uvmflag = UVM_MAPFLAG(prot, maxprot,
1006 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
1007 uvmflag);
1008 error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
1009 if (error) {
1010 if (uobj)
1011 uobj->pgops->pgo_detach(uobj);
1012 return error;
1013 }
1014
1015 /*
1016 * POSIX 1003.1b -- if our address space was configured
1017 * to lock all future mappings, wire the one we just made.
1018 *
1019 * Also handle the MAP_WIRED flag here.
1020 */
1021
1022 if (prot == VM_PROT_NONE) {
1023
1024 /*
1025 * No more work to do in this case.
1026 */
1027
1028 return 0;
1029 }
1030 if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
1031 vm_map_lock(map);
1032 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
1033 (locklimit != 0 &&
1034 size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
1035 locklimit)) {
1036 vm_map_unlock(map);
1037 uvm_unmap(map, *addr, *addr + size);
1038 return ENOMEM;
1039 }
1040
1041 /*
1042 * uvm_map_pageable() always returns the map unlocked.
1043 */
1044
1045 error = uvm_map_pageable(map, *addr, *addr + size,
1046 false, UVM_LK_ENTER);
1047 if (error) {
1048 uvm_unmap(map, *addr, *addr + size);
1049 return error;
1050 }
1051 return 0;
1052 }
1053 return 0;
1054}
1055
1056vaddr_t
1057uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
1058{
1059
1060 if (topdown)
1061 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
1062 else
1063 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
1064}
1065
1066int
1067uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
1068 off_t off)
1069{
1070 struct uvm_object *uobj;
1071 int error, flags, prot;
1072
1073 flags = MAP_SHARED;
1074 prot = VM_PROT_READ | VM_PROT_WRITE;
1075 if (*addrp)
1076 flags |= MAP_FIXED;
1077 else
1078 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1079 (vaddr_t)p->p_vmspace->vm_daddr, len,
1080 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1081
1082 uobj = udv_attach(dev, prot, off, len);
1083 if (uobj == NULL)
1084 return EINVAL;
1085
1086 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1087 (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off,
1088 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1089 return error;
1090}
1091
1092int
1093uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
1094{
1095 int error, flags, prot;
1096
1097 flags = MAP_PRIVATE | MAP_ANON;
1098 prot = VM_PROT_READ | VM_PROT_WRITE;
1099 if (*addrp)
1100 flags |= MAP_FIXED;
1101 else
1102 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
1103 (vaddr_t)p->p_vmspace->vm_daddr, len,
1104 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
1105
1106 error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
1107 (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0,
1108 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1109 return error;
1110}
1111