uvm_fault.c source code [src/src/sys/uvm/uvm_fault.c]

1	/ $NetBSD: uvm_fault.c,v 1.197 2015/06/22 06:24:17 matt Exp $ /
2
3	/*
4	* Copyright (c) 1997 Charles D. Cranor and Washington University.
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions
9	* are met:
10	* 1. Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* 2. Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26	*
27	* from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp
28	*/
29
30	/*
31	* uvm_fault.c: fault handler
32	*/
33
34	#include <sys/cdefs.h>
35	__KERNEL_RCSID(`0`, "$NetBSD: uvm_fault.c,v 1.197 2015/06/22 06:24:17 matt Exp $");
36
37	#include "opt_uvmhist.h"
38
39	#include <sys/param.h>
40	#include <sys/systm.h>
41	#include <sys/kernel.h>
42	#include <sys/mman.h>
43
44	#include <uvm/uvm.h>
45
46	/*
47	*
48	* a word on page faults:
49	*
50	* types of page faults we handle:
51	*
52	* CASE 1: upper layer faults CASE 2: lower layer faults
53	*
54	* CASE 1A CASE 1B CASE 2A CASE 2B
55	* read/write1 write>1 read/write +-cow_write/zero
56	* \| \| \| \|
57	* +--\|--+ +--\|--+ +-----+ + \| + \| +-----+
58	* amap \| V \| \| ---------> new \| \| \| \| ^ \|
59	* +-----+ +-----+ +-----+ + \| + \| +--\|--+
60	* \| \| \|
61	* +-----+ +-----+ +--\|--+ \| +--\|--+
62	* uobj \| d/c \| \| d/c \| \| V \| +----+ \|
63	* +-----+ +-----+ +-----+ +-----+
64	*
65	* d/c = don't care
66	*
67	* case [0]: layerless fault
68	* no amap or uobj is present. this is an error.
69	*
70	* case [1]: upper layer fault [anon active]
71	* 1A: [read] or [write with anon->an_ref == 1]
72	* I/O takes place in upper level anon and uobj is not touched.
73	* 1B: [write with anon->an_ref > 1]
74	* new anon is alloc'd and data is copied off ["COW"]
75	*
76	* case [2]: lower layer fault [uobj]
77	* 2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
78	* I/O takes place directly in object.
79	* 2B: [write to copy_on_write] or [read on NULL uobj]
80	* data is "promoted" from uobj to a new anon.
81	* if uobj is null, then we zero fill.
82	*
83	* we follow the standard UVM locking protocol ordering:
84	*
85	* MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ)
86	* we hold a PG_BUSY page if we unlock for I/O
87	*
88	*
89	* the code is structured as follows:
90	*
91	* - init the "IN" params in the ufi structure
92	* ReFault: (ERESTART returned to the loop in uvm_fault_internal)
93	* - do lookups [locks maps], check protection, handle needs_copy
94	* - check for case 0 fault (error)
95	* - establish "range" of fault
96	* - if we have an amap lock it and extract the anons
97	* - if sequential advice deactivate pages behind us
98	* - at the same time check pmap for unmapped areas and anon for pages
99	* that we could map in (and do map it if found)
100	* - check object for resident pages that we could map in
101	* - if (case 2) goto Case2
102	* - >>> handle case 1
103	* - ensure source anon is resident in RAM
104	* - if case 1B alloc new anon and copy from source
105	* - map the correct page in
106	* Case2:
107	* - >>> handle case 2
108	* - ensure source page is resident (if uobj)
109	* - if case 2B alloc new anon and copy from source (could be zero
110	* fill if uobj == NULL)
111	* - map the correct page in
112	* - done!
113	*
114	* note on paging:
115	* if we have to do I/O we place a PG_BUSY page in the correct object,
116	* unlock everything, and do the I/O. when I/O is done we must reverify
117	* the state of the world before assuming that our data structures are
118	* valid. [because mappings could change while the map is unlocked]
119	*
120	* alternative 1: unbusy the page in question and restart the page fault
121	* from the top (ReFault). this is easy but does not take advantage
122	* of the information that we already have from our previous lookup,
123	* although it is possible that the "hints" in the vm_map will help here.
124	*
125	* alternative 2: the system already keeps track of a "version" number of
126	* a map. [i.e. every time you write-lock a map (e.g. to change a
127	* mapping) you bump the version number up by one...] so, we can save
128	* the version number of the map before we release the lock and start I/O.
129	* then when I/O is done we can relock and check the version numbers
130	* to see if anything changed. this might save us some over 1 because
131	* we don't have to unbusy the page and may be less compares(?).
132	*
133	* alternative 3: put in backpointers or a way to "hold" part of a map
134	* in place while I/O is in progress. this could be complex to
135	* implement (especially with structures like amap that can be referenced
136	* by multiple map entries, and figuring out what should wait could be
137	* complex as well...).
138	*
139	* we use alternative 2. given that we are multi-threaded now we may want
140	* to reconsider the choice.
141	*/
142
143	/*
144	* local data structures
145	*/
146
147	struct uvm_advice {
148	int advice;
149	int nback;
150	int nforw;
151	};
152
153	/*
154	* page range array:
155	* note: index in array must match "advice" value
156	* XXX: borrowed numbers from freebsd. do they work well for us?
157	*/
158
159	static const struct uvm_advice uvmadvice[] = {
160	{ UVM_ADV_NORMAL, `3`, `4` },
161	{ UVM_ADV_RANDOM, `0`, `0` },
162	{ UVM_ADV_SEQUENTIAL, `8`, `7`},
163	};
164
165	#define UVM_MAXRANGE 16 /* must be MAX() of nback+nforw+1 */
166
167	/*
168	* private prototypes
169	*/
170
171	/*
172	* externs from other modules
173	*/
174
175	extern int start_init_exec; / Is init_main() done / init running? /
176
177	/*
178	* inline functions
179	*/
180
181	/*
182	* uvmfault_anonflush: try and deactivate pages in specified anons
183	*
184	* => does not have to deactivate page if it is busy
185	*/
186
187	static inline void
188	uvmfault_anonflush(struct vm_anon *anons, int* n)
189	{
190	int lcv;
191	struct vm_page *pg;
192
193	for (lcv = `0`; lcv < n; lcv++) {
194	if (anons[lcv] == NULL)
195	continue;
196	KASSERT(mutex_owned(anons[lcv]->an_lock));
197	pg = anons[lcv]->an_page;
198	if (pg && (pg->flags & PG_BUSY) == `0`) {
199	mutex_enter(&uvm_pageqlock);
200	if (pg->wire_count == `0`) {
201	uvm_pagedeactivate(pg);
202	}
203	mutex_exit(&uvm_pageqlock);
204	}
205	}
206	}
207
208	/*
209	* normal functions
210	*/
211
212	/*
213	* uvmfault_amapcopy: clear "needs_copy" in a map.
214	*
215	* => called with VM data structures unlocked (usually, see below)
216	* => we get a write lock on the maps and clear needs_copy for a VA
217	* => if we are out of RAM we sleep (waiting for more)
218	*/
219
220	static void
221	uvmfault_amapcopy(struct uvm_faultinfo *ufi)
222	{
223	for (;;) {
224
225	/*
226	* no mapping? give up.
227	*/
228
229	if (uvmfault_lookup(ufi, true) == false)
230	return;
231
232	/*
233	* copy if needed.
234	*/
235
236	if (UVM_ET_ISNEEDSCOPY(ufi->entry))
237	amap_copy(ufi->map, ufi->entry, AMAP_COPY_NOWAIT,
238	ufi->orig_rvaddr, ufi->orig_rvaddr + `1`);
239
240	/*
241	* didn't work? must be out of RAM. unlock and sleep.
242	*/
243
244	if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
245	uvmfault_unlockmaps(ufi, true);
246	uvm_wait("fltamapcopy");
247	continue;
248	}
249
250	/*
251	* got it! unlock and return.
252	*/
253
254	uvmfault_unlockmaps(ufi, true);
255	return;
256	}
257	/NOTREACHED/
258	}
259
260	/*
261	* uvmfault_anonget: get data in an anon into a non-busy, non-released
262	* page in that anon.
263	*
264	* => Map, amap and thus anon should be locked by caller.
265	* => If we fail, we unlock everything and error is returned.
266	* => If we are successful, return with everything still locked.
267	* => We do not move the page on the queues [gets moved later]. If we
268	* allocate a new page [we_own], it gets put on the queues. Either way,
269	* the result is that the page is on the queues at return time
270	* => For pages which are on loan from a uvm_object (and thus are not owned
271	* by the anon): if successful, return with the owning object locked.
272	* The caller must unlock this object when it unlocks everything else.
273	*/
274
275	int
276	uvmfault_anonget(struct uvm_faultinfo ufi, struct* vm_amap *amap,
277	struct vm_anon *anon)
278	{
279	struct vm_page *pg;
280	int error;
281
282	UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist);
283	KASSERT(mutex_owned(anon->an_lock));
284	KASSERT(anon->an_lock == amap->am_lock);
285
286	/ Increment the counters./
287	uvmexp.fltanget++;
288	if (anon->an_page) {
289	curlwp->l_ru.ru_minflt++;
290	} else {
291	curlwp->l_ru.ru_majflt++;
292	}
293	error = `0`;
294
295	/*
296	* Loop until we get the anon data, or fail.
297	*/
298
299	for (;;) {
300	bool we_own, locked;
301	/*
302	* Note: 'we_own' will become true if we set PG_BUSY on a page.
303	*/
304	we_own = false;
305	pg = anon->an_page;
306
307	/*
308	* If there is a resident page and it is loaned, then anon
309	* may not own it. Call out to uvm_anon_lockloanpg() to
310	* identify and lock the real owner of the page.
311	*/
312
313	if (pg && pg->loan_count)
314	pg = uvm_anon_lockloanpg(anon);
315
316	/*
317	* Is page resident? Make sure it is not busy/released.
318	*/
319
320	if (pg) {
321
322	/*
323	* at this point, if the page has a uobject [meaning
324	* we have it on loan], then that uobject is locked
325	* by us! if the page is busy, we drop all the
326	* locks (including uobject) and try again.
327	*/
328
329	if ((pg->flags & PG_BUSY) == `0`) {
330	UVMHIST_LOG(maphist, "<- OK",`0`,`0`,`0`,`0`);
331	return `0`;
332	}
333	pg->flags \|= PG_WANTED;
334	uvmexp.fltpgwait++;
335
336	/*
337	* The last unlock must be an atomic unlock and wait
338	* on the owner of page.
339	*/
340
341	if (pg->uobject) {
342	/ Owner of page is UVM object. /
343	uvmfault_unlockall(ufi, amap, NULL);
344	UVMHIST_LOG(maphist, " unlock+wait on uobj",`0`,
345	`0`,`0`,`0`);
346	UVM_UNLOCK_AND_WAIT(pg,
347	pg->uobject->vmobjlock,
348	false, "anonget1", `0`);
349	} else {
350	/ Owner of page is anon. /
351	uvmfault_unlockall(ufi, NULL, NULL);
352	UVMHIST_LOG(maphist, " unlock+wait on anon",`0`,
353	`0`,`0`,`0`);
354	UVM_UNLOCK_AND_WAIT(pg, anon->an_lock,
355	false, "anonget2", `0`);
356	}
357	} else {
358	#if defined(VMSWAP)
359	/*
360	* No page, therefore allocate one.
361	*/
362
363	pg = uvm_pagealloc(NULL,
364	ufi != NULL ? ufi->orig_rvaddr : `0`,
365	anon, ufi != NULL ? UVM_FLAG_COLORMATCH : `0`);
366	if (pg == NULL) {
367	/ Out of memory. Wait a little. /
368	uvmfault_unlockall(ufi, amap, NULL);
369	uvmexp.fltnoram++;
370	UVMHIST_LOG(maphist, " noram -- UVM_WAIT",`0`,
371	`0`,`0`,`0`);
372	if (!uvm_reclaimable()) {
373	return ENOMEM;
374	}
375	uvm_wait("flt_noram1");
376	} else {
377	/ PG_BUSY bit is set. /
378	we_own = true;
379	uvmfault_unlockall(ufi, amap, NULL);
380
381	/*
382	* Pass a PG_BUSY+PG_FAKE+PG_CLEAN page into
383	* the uvm_swap_get() function with all data
384	* structures unlocked. Note that it is OK
385	* to read an_swslot here, because we hold
386	* PG_BUSY on the page.
387	*/
388	uvmexp.pageins++;
389	error = uvm_swap_get(pg, anon->an_swslot,
390	PGO_SYNCIO);
391
392	/*
393	* We clean up after the I/O below in the
394	* 'we_own' case.
395	*/
396	}
397	#else
398	panic("%s: no page", __func__);
399	#endif /* defined(VMSWAP) */
400	}
401
402	/*
403	* Re-lock the map and anon.
404	*/
405
406	locked = uvmfault_relock(ufi);
407	if (locked \|\| we_own) {
408	mutex_enter(anon->an_lock);
409	}
410
411	/*
412	* If we own the page (i.e. we set PG_BUSY), then we need
413	* to clean up after the I/O. There are three cases to
414	* consider:
415	*
416	* 1) Page was released during I/O: free anon and ReFault.
417	* 2) I/O not OK. Free the page and cause the fault to fail.
418	* 3) I/O OK! Activate the page and sync with the non-we_own
419	* case (i.e. drop anon lock if not locked).
420	*/
421
422	if (we_own) {
423	#if defined(VMSWAP)
424	if (pg->flags & PG_WANTED) {
425	wakeup(pg);
426	}
427	if (error) {
428
429	/*
430	* Remove the swap slot from the anon and
431	* mark the anon as having no real slot.
432	* Do not free the swap slot, thus preventing
433	* it from being used again.
434	*/
435
436	if (anon->an_swslot > `0`) {
437	uvm_swap_markbad(anon->an_swslot, `1`);
438	}
439	anon->an_swslot = SWSLOT_BAD;
440
441	if ((pg->flags & PG_RELEASED) != `0`) {
442	goto released;
443	}
444
445	/*
446	* Note: page was never !PG_BUSY, so it
447	* cannot be mapped and thus no need to
448	* pmap_page_protect() it.
449	*/
450
451	mutex_enter(&uvm_pageqlock);
452	uvm_pagefree(pg);
453	mutex_exit(&uvm_pageqlock);
454
455	if (locked) {
456	uvmfault_unlockall(ufi, NULL, NULL);
457	}
458	mutex_exit(anon->an_lock);
459	UVMHIST_LOG(maphist, "<- ERROR", `0`,`0`,`0`,`0`);
460	return error;
461	}
462
463	if ((pg->flags & PG_RELEASED) != `0`) {
464	released:
465	KASSERT(anon->an_ref == `0`);
466
467	/*
468	* Released while we had unlocked amap.
469	*/
470
471	if (locked) {
472	uvmfault_unlockall(ufi, NULL, NULL);
473	}
474	uvm_anon_release(anon);
475
476	if (error) {
477	UVMHIST_LOG(maphist,
478	"<- ERROR/RELEASED", `0`,`0`,`0`,`0`);
479	return error;
480	}
481
482	UVMHIST_LOG(maphist, "<- RELEASED", `0`,`0`,`0`,`0`);
483	return ERESTART;
484	}
485
486	/*
487	* We have successfully read the page, activate it.
488	*/
489
490	mutex_enter(&uvm_pageqlock);
491	uvm_pageactivate(pg);
492	mutex_exit(&uvm_pageqlock);
493	pg->flags &= ~(PG_WANTED\|PG_BUSY\|PG_FAKE);
494	UVM_PAGE_OWN(pg, NULL);
495	#else
496	panic("%s: we_own", __func__);
497	#endif /* defined(VMSWAP) */
498	}
499
500	/*
501	* We were not able to re-lock the map - restart the fault.
502	*/
503
504	if (!locked) {
505	if (we_own) {
506	mutex_exit(anon->an_lock);
507	}
508	UVMHIST_LOG(maphist, "<- REFAULT", `0`,`0`,`0`,`0`);
509	return ERESTART;
510	}
511
512	/*
513	* Verify that no one has touched the amap and moved
514	* the anon on us.
515	*/
516
517	if (ufi != NULL && amap_lookup(&ufi->entry->aref,
518	ufi->orig_rvaddr - ufi->entry->start) != anon) {
519
520	uvmfault_unlockall(ufi, amap, NULL);
521	UVMHIST_LOG(maphist, "<- REFAULT", `0`,`0`,`0`,`0`);
522	return ERESTART;
523	}
524
525	/*
526	* Retry..
527	*/
528
529	uvmexp.fltanretry++;
530	continue;
531	}
532	/NOTREACHED/
533	}
534
535	/*
536	* uvmfault_promote: promote data to a new anon. used for 1B and 2B.
537	*
538	* 1. allocate an anon and a page.
539	* 2. fill its contents.
540	* 3. put it into amap.
541	*
542	* => if we fail (result != 0) we unlock everything.
543	* => on success, return a new locked anon via 'nanon'.
544	* (*nanon)->an_page will be a resident, locked, dirty page.
545	* => it's caller's responsibility to put the promoted nanon->an_page to the
546	* page queue.
547	*/
548
549	static int
550	uvmfault_promote(struct uvm_faultinfo *ufi,
551	struct vm_anon *oanon,
552	struct vm_page *uobjpage,
553	struct vm_anon *nanon, /* OUT: allocated anon /
554	struct vm_anon **spare)
555	{
556	struct vm_amap *amap = ufi->entry->aref.ar_amap;
557	struct uvm_object *uobj;
558	struct vm_anon *anon;
559	struct vm_page *pg;
560	struct vm_page *opg;
561	int error;
562	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
563
564	if (oanon) {
565	/ anon COW /
566	opg = oanon->an_page;
567	KASSERT(opg != NULL);
568	KASSERT(opg->uobject == NULL \|\| opg->loan_count > `0`);
569	} else if (uobjpage != PGO_DONTCARE) {
570	/ object-backed COW /
571	opg = uobjpage;
572	} else {
573	/ ZFOD /
574	opg = NULL;
575	}
576	if (opg != NULL) {
577	uobj = opg->uobject;
578	} else {
579	uobj = NULL;
580	}
581
582	KASSERT(amap != NULL);
583	KASSERT(uobjpage != NULL);
584	KASSERT(uobjpage == PGO_DONTCARE \|\| (uobjpage->flags & PG_BUSY) != `0`);
585	KASSERT(mutex_owned(amap->am_lock));
586	KASSERT(oanon == NULL \|\| amap->am_lock == oanon->an_lock);
587	KASSERT(uobj == NULL \|\| mutex_owned(uobj->vmobjlock));
588
589	if (*spare != NULL) {
590	anon = *spare;
591	*spare = NULL;
592	} else {
593	anon = uvm_analloc();
594	}
595	if (anon) {
596
597	/*
598	* The new anon is locked.
599	*
600	* if opg == NULL, we want a zero'd, dirty page,
601	* so have uvm_pagealloc() do that for us.
602	*/
603
604	KASSERT(anon->an_lock == NULL);
605	anon->an_lock = amap->am_lock;
606	pg = uvm_pagealloc(NULL, ufi->orig_rvaddr, anon,
607	UVM_FLAG_COLORMATCH \| (opg == NULL ? UVM_PGA_ZERO : `0`));
608	if (pg == NULL) {
609	anon->an_lock = NULL;
610	}
611	} else {
612	pg = NULL;
613	}
614
615	/*
616	* out of memory resources?
617	*/
618
619	if (pg == NULL) {
620	/ save anon for the next try. /
621	if (anon != NULL) {
622	*spare = anon;
623	}
624
625	/ unlock and fail ... /
626	uvm_page_unbusy(&uobjpage, `1`);
627	uvmfault_unlockall(ufi, amap, uobj);
628	if (!uvm_reclaimable()) {
629	UVMHIST_LOG(maphist, "out of VM", `0`,`0`,`0`,`0`);
630	uvmexp.fltnoanon++;
631	error = ENOMEM;
632	goto done;
633	}
634
635	UVMHIST_LOG(maphist, "out of RAM, waiting for more", `0`,`0`,`0`,`0`);
636	uvmexp.fltnoram++;
637	uvm_wait("flt_noram5");
638	error = ERESTART;
639	goto done;
640	}
641
642	/ copy page [pg now dirty] /
643	if (opg) {
644	uvm_pagecopy(opg, pg);
645	}
646
647	amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon,
648	oanon != NULL);
649
650	*nanon = anon;
651	error = `0`;
652	done:
653	return error;
654	}
655
656
657	/*
658	* F A U L T - m a i n e n t r y p o i n t
659	*/
660
661	/*
662	* uvm_fault: page fault handler
663	*
664	* => called from MD code to resolve a page fault
665	* => VM data structures usually should be unlocked. however, it is
666	* possible to call here with the main map locked if the caller
667	* gets a write lock, sets it recusive, and then calls us (c.f.
668	* uvm_map_pageable). this should be avoided because it keeps
669	* the map locked off during I/O.
670	* => MUST NEVER BE CALLED IN INTERRUPT CONTEXT
671	*/
672
673	#define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \
674	~VM_PROT_WRITE : VM_PROT_ALL)
675
676	/ fault_flag values passed from uvm_fault_wire to uvm_fault_internal /
677	#define UVM_FAULT_WIRE (1 << 0)
678	#define UVM_FAULT_MAXPROT (1 << 1)
679
680	struct uvm_faultctx {
681
682	/*
683	* the following members are set up by uvm_fault_check() and
684	* read-only after that.
685	*
686	* note that narrow is used by uvm_fault_check() to change
687	* the behaviour after ERESTART.
688	*
689	* most of them might change after RESTART if the underlying
690	* map entry has been changed behind us. an exception is
691	* wire_paging, which does never change.
692	*/
693	vm_prot_t access_type;
694	vaddr_t startva;
695	int npages;
696	int centeridx;
697	bool narrow; / work on a single requested page only /
698	bool wire_mapping; / request a PMAP_WIRED mapping*
699	(UVM_FAULT_WIRE or VM_MAPENT_ISWIRED) /*
700	bool wire_paging; / request uvm_pagewire*
701	(true for UVM_FAULT_WIRE) /*
702	bool cow_now; / VM_PROT_WRITE is actually requested*
703	(ie. should break COW and page loaning) /*
704
705	/*
706	* enter_prot is set up by uvm_fault_check() and clamped
707	* (ie. drop the VM_PROT_WRITE bit) in various places in case
708	* of !cow_now.
709	*/
710	vm_prot_t enter_prot; / prot at which we want to enter pages in /
711
712	/*
713	* the following member is for uvmfault_promote() and ERESTART.
714	*/
715	struct vm_anon *anon_spare;
716
717	/*
718	* the folloing is actually a uvm_fault_lower() internal.
719	* it's here merely for debugging.
720	* (or due to the mechanical separation of the function?)
721	*/
722	bool promote;
723	};
724
725	static inline int uvm_fault_check(
726	struct uvm_faultinfo , struct* uvm_faultctx *,
727	struct vm_anon ***, bool);
728
729	static int uvm_fault_upper(
730	struct uvm_faultinfo , struct* uvm_faultctx *,
731	struct vm_anon **);
732	static inline int uvm_fault_upper_lookup(
733	struct uvm_faultinfo , const* struct uvm_faultctx *,
734	struct vm_anon , struct vm_page );
735	static inline void uvm_fault_upper_neighbor(
736	struct uvm_faultinfo , const* struct uvm_faultctx *,
737	vaddr_t, struct vm_page *, bool);
738	static inline int uvm_fault_upper_loan(
739	struct uvm_faultinfo , struct* uvm_faultctx *,
740	struct vm_anon , struct* uvm_object **);
741	static inline int uvm_fault_upper_promote(
742	struct uvm_faultinfo , struct* uvm_faultctx *,
743	struct uvm_object , struct* vm_anon *);
744	static inline int uvm_fault_upper_direct(
745	struct uvm_faultinfo , struct* uvm_faultctx *,
746	struct uvm_object , struct* vm_anon *);
747	static int uvm_fault_upper_enter(
748	struct uvm_faultinfo , const* struct uvm_faultctx *,
749	struct uvm_object , struct* vm_anon *,
750	struct vm_page , struct* vm_anon *);
751	static inline void uvm_fault_upper_done(
752	struct uvm_faultinfo , const* struct uvm_faultctx *,
753	struct vm_anon , struct* vm_page *);
754
755	static int uvm_fault_lower(
756	struct uvm_faultinfo , struct* uvm_faultctx *,
757	struct vm_page **);
758	static inline void uvm_fault_lower_lookup(
759	struct uvm_faultinfo , const* struct uvm_faultctx *,
760	struct vm_page **);
761	static inline void uvm_fault_lower_neighbor(
762	struct uvm_faultinfo , const* struct uvm_faultctx *,
763	vaddr_t, struct vm_page *, bool);
764	static inline int uvm_fault_lower_io(
765	struct uvm_faultinfo , const* struct uvm_faultctx *,
766	struct uvm_object , struct vm_page );
767	static inline int uvm_fault_lower_direct(
768	struct uvm_faultinfo , struct* uvm_faultctx *,
769	struct uvm_object , struct* vm_page *);
770	static inline int uvm_fault_lower_direct_loan(
771	struct uvm_faultinfo , struct* uvm_faultctx *,
772	struct uvm_object , struct* vm_page **,
773	struct vm_page **);
774	static inline int uvm_fault_lower_promote(
775	struct uvm_faultinfo , struct* uvm_faultctx *,
776	struct uvm_object , struct* vm_page *);
777	static int uvm_fault_lower_enter(
778	struct uvm_faultinfo , const* struct uvm_faultctx *,
779	struct uvm_object *,
780	struct vm_anon , struct* vm_page *);
781	static inline void uvm_fault_lower_done(
782	struct uvm_faultinfo , const* struct uvm_faultctx *,
783	struct uvm_object , struct* vm_page *);
784
785	int
786	uvm_fault_internal(struct vm_map *orig_map, vaddr_t vaddr,
787	vm_prot_t access_type, int fault_flag)
788	{
789	struct cpu_data *cd;
790	struct uvm_cpu *ucpu;
791	struct uvm_faultinfo ufi;
792	struct uvm_faultctx flt = {
793	.access_type = access_type,
794
795	/ don't look for neighborhood * pages on "wire" fault /
796	.narrow = (fault_flag & UVM_FAULT_WIRE) != `0`,
797
798	/ "wire" fault causes wiring of both mapping and paging /
799	.wire_mapping = (fault_flag & UVM_FAULT_WIRE) != `0`,
800	.wire_paging = (fault_flag & UVM_FAULT_WIRE) != `0`,
801	};
802	const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != `0`;
803	struct vm_anon anons_store[UVM_MAXRANGE], *anons;
804	struct vm_page pages_store[UVM_MAXRANGE], *pages;
805	int error;
806
807	UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist);
808
809	UVMHIST_LOG(maphist, "(map=%p, vaddr=%#lx, at=%d, ff=%d)",
810	orig_map, vaddr, access_type, fault_flag);
811
812	cd = &(curcpu()->ci_data);
813	cd->cpu_nfault++;
814	ucpu = cd->cpu_uvm;
815
816	/ Don't flood RNG subsystem with samples. /
817	if (cd->cpu_nfault % `503`)
818	goto norng;
819
820	/ Don't count anything until user interaction is possible /
821	if (__predict_true(start_init_exec)) {
822	kpreempt_disable();
823	rnd_add_uint32(&ucpu->rs,
824	sizeof(vaddr_t) == sizeof(uint32_t) ?
825	(uint32_t)vaddr : sizeof(vaddr_t) ==
826	sizeof(uint64_t) ?
827	(uint32_t)(vaddr & `0x00000000ffffffff`) :
828	(uint32_t)(cd->cpu_nfault & `0x00000000ffffffff`));
829	kpreempt_enable();
830	}
831	norng:
832	/*
833	* init the IN parameters in the ufi
834	*/
835
836	ufi.orig_map = orig_map;
837	ufi.orig_rvaddr = trunc_page(vaddr);
838	ufi.orig_size = PAGE_SIZE; / can't get any smaller than this /
839
840	error = ERESTART;
841	while (error == ERESTART) { / ReFault: /
842	anons = anons_store;
843	pages = pages_store;
844
845	error = uvm_fault_check(&ufi, &flt, &anons, maxprot);
846	if (error != `0`)
847	continue;
848
849	error = uvm_fault_upper_lookup(&ufi, &flt, anons, pages);
850	if (error != `0`)
851	continue;
852
853	if (pages[flt.centeridx] == PGO_DONTCARE)
854	error = uvm_fault_upper(&ufi, &flt, anons);
855	else {
856	struct uvm_object * const uobj =
857	ufi.entry->object.uvm_obj;
858
859	if (uobj && uobj->pgops->pgo_fault != NULL) {
860	/*
861	* invoke "special" fault routine.
862	*/
863	mutex_enter(uobj->vmobjlock);
864	/ locked: maps(read), amap(if there), uobj /
865	error = uobj->pgops->pgo_fault(&ufi,
866	flt.startva, pages, flt.npages,
867	flt.centeridx, flt.access_type,
868	PGO_LOCKED\|PGO_SYNCIO);
869
870	/*
871	* locked: nothing, pgo_fault has unlocked
872	* everything
873	*/
874
875	/*
876	* object fault routine responsible for
877	* pmap_update().
878	*/
879	} else {
880	error = uvm_fault_lower(&ufi, &flt, pages);
881	}
882	}
883	}
884
885	if (flt.anon_spare != NULL) {
886	flt.anon_spare->an_ref--;
887	KASSERT(flt.anon_spare->an_ref == `0`);
888	KASSERT(flt.anon_spare->an_lock == NULL);
889	uvm_anon_free(flt.anon_spare);
890	}
891	return error;
892	}
893
894	/*
895	* uvm_fault_check: check prot, handle needs-copy, etc.
896	*
897	* 1. lookup entry.
898	* 2. check protection.
899	* 3. adjust fault condition (mainly for simulated fault).
900	* 4. handle needs-copy (lazy amap copy).
901	* 5. establish range of interest for neighbor fault (aka pre-fault).
902	* 6. look up anons (if amap exists).
903	* 7. flush pages (if MADV_SEQUENTIAL)
904	*
905	* => called with nothing locked.
906	* => if we fail (result != 0) we unlock everything.
907	* => initialize/adjust many members of flt.
908	*/
909
910	static int
911	uvm_fault_check(
912	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
913	struct vm_anon ***ranons, bool maxprot)
914	{
915	struct vm_amap *amap;
916	struct uvm_object *uobj;
917	vm_prot_t check_prot;
918	int nback, nforw;
919	UVMHIST_FUNC("uvm_fault_check"); UVMHIST_CALLED(maphist);
920
921	/*
922	* lookup and lock the maps
923	*/
924
925	if (uvmfault_lookup(ufi, false) == false) {
926	UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", ufi->orig_rvaddr,
927	`0`,`0`,`0`);
928	return EFAULT;
929	}
930	/ locked: maps(read) /
931
932	#ifdef DIAGNOSTIC
933	if ((ufi->map->flags & VM_MAP_PAGEABLE) == `0`) {
934	printf("Page fault on non-pageable map:\n");
935	printf("ufi->map = %p\n", ufi->map);
936	printf("ufi->orig_map = %p\n", ufi->orig_map);
937	printf("ufi->orig_rvaddr = 0x%lx\n", (u_long) ufi->orig_rvaddr);
938	panic("uvm_fault: (ufi->map->flags & VM_MAP_PAGEABLE) == 0");
939	}
940	#endif
941
942	/*
943	* check protection
944	*/
945
946	check_prot = maxprot ?
947	ufi->entry->max_protection : ufi->entry->protection;
948	if ((check_prot & flt->access_type) != flt->access_type) {
949	UVMHIST_LOG(maphist,
950	"<- protection failure (prot=%#x, access=%#x)",
951	ufi->entry->protection, flt->access_type, `0`, `0`);
952	uvmfault_unlockmaps(ufi, false);
953	return EACCES;
954	}
955
956	/*
957	* "enter_prot" is the protection we want to enter the page in at.
958	* for certain pages (e.g. copy-on-write pages) this protection can
959	* be more strict than ufi->entry->protection. "wired" means either
960	* the entry is wired or we are fault-wiring the pg.
961	*/
962
963	flt->enter_prot = ufi->entry->protection;
964	if (VM_MAPENT_ISWIRED(ufi->entry))
965	flt->wire_mapping = true;
966
967	if (flt->wire_mapping) {
968	flt->access_type = flt->enter_prot; / full access for wired /
969	flt->cow_now = (check_prot & VM_PROT_WRITE) != `0`;
970	} else {
971	flt->cow_now = (flt->access_type & VM_PROT_WRITE) != `0`;
972	}
973
974	flt->promote = false;
975
976	/*
977	* handle "needs_copy" case. if we need to copy the amap we will
978	* have to drop our readlock and relock it with a write lock. (we
979	* need a write lock to change anything in a map entry [e.g.
980	* needs_copy]).
981	*/
982
983	if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
984	if (flt->cow_now \|\| (ufi->entry->object.uvm_obj == NULL)) {
985	KASSERT(!maxprot);
986	/ need to clear /
987	UVMHIST_LOG(maphist,
988	" need to clear needs_copy and refault",`0`,`0`,`0`,`0`);
989	uvmfault_unlockmaps(ufi, false);
990	uvmfault_amapcopy(ufi);
991	uvmexp.fltamcopy++;
992	return ERESTART;
993
994	} else {
995
996	/*
997	* ensure that we pmap_enter page R/O since
998	* needs_copy is still true
999	*/
1000
1001	flt->enter_prot &= ~VM_PROT_WRITE;
1002	}
1003	}
1004
1005	/*
1006	* identify the players
1007	*/
1008
1009	amap = ufi->entry->aref.ar_amap; / upper layer /
1010	uobj = ufi->entry->object.uvm_obj; / lower layer /
1011
1012	/*
1013	* check for a case 0 fault. if nothing backing the entry then
1014	* error now.
1015	*/
1016
1017	if (amap == NULL && uobj == NULL) {
1018	uvmfault_unlockmaps(ufi, false);
1019	UVMHIST_LOG(maphist,"<- no backing store, no overlay",`0`,`0`,`0`,`0`);
1020	return EFAULT;
1021	}
1022
1023	/*
1024	* establish range of interest based on advice from mapper
1025	* and then clip to fit map entry. note that we only want
1026	* to do this the first time through the fault. if we
1027	* ReFault we will disable this by setting "narrow" to true.
1028	*/
1029
1030	if (flt->narrow == false) {
1031
1032	/ wide fault (!narrow) /
1033	KASSERT(uvmadvice[ufi->entry->advice].advice ==
1034	ufi->entry->advice);
1035	nback = MIN(uvmadvice[ufi->entry->advice].nback,
1036	(ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT);
1037	flt->startva = ufi->orig_rvaddr - (nback << PAGE_SHIFT);
1038	/*
1039	* note: "-1" because we don't want to count the
1040	* faulting page as forw
1041	*/
1042	nforw = MIN(uvmadvice[ufi->entry->advice].nforw,
1043	((ufi->entry->end - ufi->orig_rvaddr) >>
1044	PAGE_SHIFT) - `1`);
1045	flt->npages = nback + nforw + `1`;
1046	flt->centeridx = nback;
1047
1048	flt->narrow = true; / ensure only once per-fault /
1049
1050	} else {
1051
1052	/ narrow fault! /
1053	nback = nforw = `0`;
1054	flt->startva = ufi->orig_rvaddr;
1055	flt->npages = `1`;
1056	flt->centeridx = `0`;
1057
1058	}
1059	/ offset from entry's start to pgs' start /
1060	const voff_t eoff = flt->startva - ufi->entry->start;
1061
1062	/ locked: maps(read) /
1063	UVMHIST_LOG(maphist, " narrow=%d, back=%d, forw=%d, startva=%#lx",
1064	flt->narrow, nback, nforw, flt->startva);
1065	UVMHIST_LOG(maphist, " entry=%p, amap=%p, obj=%p", ufi->entry,
1066	amap, uobj, `0`);
1067
1068	/*
1069	* if we've got an amap, lock it and extract current anons.
1070	*/
1071
1072	if (amap) {
1073	amap_lock(amap);
1074	amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages);
1075	} else {
1076	ranons = NULL; /* to be safe /
1077	}
1078
1079	/ locked: maps(read), amap(if there) /
1080	KASSERT(amap == NULL \|\| mutex_owned(amap->am_lock));
1081
1082	/*
1083	* for MADV_SEQUENTIAL mappings we want to deactivate the back pages
1084	* now and then forget about them (for the rest of the fault).
1085	*/
1086
1087	if (ufi->entry->advice == MADV_SEQUENTIAL && nback != `0`) {
1088
1089	UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages",
1090	`0`,`0`,`0`,`0`);
1091	/ flush back-page anons? /
1092	if (amap)
1093	uvmfault_anonflush(*ranons, nback);
1094
1095	/ flush object? /
1096	if (uobj) {
1097	voff_t uoff;
1098
1099	uoff = ufi->entry->offset + eoff;
1100	mutex_enter(uobj->vmobjlock);
1101	(void) (uobj->pgops->pgo_put)(uobj, uoff, uoff +
1102	(nback << PAGE_SHIFT), PGO_DEACTIVATE);
1103	}
1104
1105	/ now forget about the backpages /
1106	if (amap)
1107	*ranons += nback;
1108	flt->startva += (nback << PAGE_SHIFT);
1109	flt->npages -= nback;
1110	flt->centeridx = `0`;
1111	}
1112	/*
1113	* => startva is fixed
1114	* => npages is fixed
1115	*/
1116	KASSERT(flt->startva <= ufi->orig_rvaddr);
1117	KASSERT(ufi->orig_rvaddr + ufi->orig_size <=
1118	flt->startva + (flt->npages << PAGE_SHIFT));
1119	return `0`;
1120	}
1121
1122	/*
1123	* uvm_fault_upper_lookup: look up existing h/w mapping and amap.
1124	*
1125	* iterate range of interest:
1126	* 1. check if h/w mapping exists. if yes, we don't care
1127	* 2. check if anon exists. if not, page is lower.
1128	* 3. if anon exists, enter h/w mapping for neighbors.
1129	*
1130	* => called with amap locked (if exists).
1131	*/
1132
1133	static int
1134	uvm_fault_upper_lookup(
1135	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
1136	struct vm_anon anons, struct vm_page pages)
1137	{
1138	struct vm_amap *amap = ufi->entry->aref.ar_amap;
1139	int lcv;
1140	vaddr_t currva;
1141	bool shadowed __unused;
1142	UVMHIST_FUNC("uvm_fault_upper_lookup"); UVMHIST_CALLED(maphist);
1143
1144	/ locked: maps(read), amap(if there) /
1145	KASSERT(amap == NULL \|\| mutex_owned(amap->am_lock));
1146
1147	/*
1148	* map in the backpages and frontpages we found in the amap in hopes
1149	* of preventing future faults. we also init the pages[] array as
1150	* we go.
1151	*/
1152
1153	currva = flt->startva;
1154	shadowed = false;
1155	for (lcv = `0`; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
1156	/*
1157	* don't play with VAs that are already mapped
1158	* (except for center)
1159	*/
1160	if (lcv != flt->centeridx &&
1161	pmap_extract(ufi->orig_map->pmap, currva, NULL)) {
1162	pages[lcv] = PGO_DONTCARE;
1163	continue;
1164	}
1165
1166	/*
1167	* unmapped or center page. check if any anon at this level.
1168	*/
1169	if (amap == NULL \|\| anons[lcv] == NULL) {
1170	pages[lcv] = NULL;
1171	continue;
1172	}
1173
1174	/*
1175	* check for present page and map if possible. re-activate it.
1176	*/
1177
1178	pages[lcv] = PGO_DONTCARE;
1179	if (lcv == flt->centeridx) { / save center for later! /
1180	shadowed = true;
1181	continue;
1182	}
1183
1184	struct vm_anon *anon = anons[lcv];
1185	struct vm_page *pg = anon->an_page;
1186
1187	KASSERT(anon->an_lock == amap->am_lock);
1188
1189	/ Ignore loaned and busy pages. /
1190	if (pg && pg->loan_count == `0` && (pg->flags & PG_BUSY) == `0`) {
1191	uvm_fault_upper_neighbor(ufi, flt, currva,
1192	pg, anon->an_ref > `1`);
1193	}
1194	}
1195
1196	/ locked: maps(read), amap(if there) /
1197	KASSERT(amap == NULL \|\| mutex_owned(amap->am_lock));
1198	/ (shadowed == true) if there is an anon at the faulting address /
1199	UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed,
1200	(ufi->entry->object.uvm_obj && shadowed != false),`0`,`0`);
1201
1202	/*
1203	* note that if we are really short of RAM we could sleep in the above
1204	* call to pmap_enter with everything locked. bad?
1205	*
1206	* XXX Actually, that is bad; pmap_enter() should just fail in that
1207	* XXX case. --thorpej
1208	*/
1209
1210	return `0`;
1211	}
1212
1213	/*
1214	* uvm_fault_upper_neighbor: enter single lower neighbor page.
1215	*
1216	* => called with amap and anon locked.
1217	*/
1218
1219	static void
1220	uvm_fault_upper_neighbor(
1221	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
1222	vaddr_t currva, struct vm_page *pg, bool readonly)
1223	{
1224	UVMHIST_FUNC("uvm_fault_upper_neighbor"); UVMHIST_CALLED(maphist);
1225
1226	/ locked: amap, anon /
1227
1228	mutex_enter(&uvm_pageqlock);
1229	uvm_pageenqueue(pg);
1230	mutex_exit(&uvm_pageqlock);
1231	UVMHIST_LOG(maphist,
1232	" MAPPING: n anon: pm=%p, va=%#lx, pg=%p",
1233	ufi->orig_map->pmap, currva, pg, `0`);
1234	uvmexp.fltnamap++;
1235
1236	/*
1237	* Since this page isn't the page that's actually faulting,
1238	* ignore pmap_enter() failures; it's not critical that we
1239	* enter these right now.
1240	*/
1241
1242	(void) pmap_enter(ufi->orig_map->pmap, currva,
1243	VM_PAGE_TO_PHYS(pg),
1244	readonly ? (flt->enter_prot & ~VM_PROT_WRITE) :
1245	flt->enter_prot,
1246	PMAP_CANFAIL \| (flt->wire_mapping ? PMAP_WIRED : `0`));
1247
1248	pmap_update(ufi->orig_map->pmap);
1249	}
1250
1251	/*
1252	* uvm_fault_upper: handle upper fault.
1253	*
1254	* 1. acquire anon lock.
1255	* 2. get anon. let uvmfault_anonget do the dirty work.
1256	* 3. handle loan.
1257	* 4. dispatch direct or promote handlers.
1258	*/
1259
1260	static int
1261	uvm_fault_upper(
1262	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
1263	struct vm_anon **anons)
1264	{
1265	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
1266	struct vm_anon * const anon = anons[flt->centeridx];
1267	struct uvm_object *uobj;
1268	int error;
1269	UVMHIST_FUNC("uvm_fault_upper"); UVMHIST_CALLED(maphist);
1270
1271	/ locked: maps(read), amap, anon /
1272	KASSERT(mutex_owned(amap->am_lock));
1273	KASSERT(anon->an_lock == amap->am_lock);
1274
1275	/*
1276	* handle case 1: fault on an anon in our amap
1277	*/
1278
1279	UVMHIST_LOG(maphist, " case 1 fault: anon=%p", anon, `0`,`0`,`0`);
1280
1281	/*
1282	* no matter if we have case 1A or case 1B we are going to need to
1283	* have the anon's memory resident. ensure that now.
1284	*/
1285
1286	/*
1287	* let uvmfault_anonget do the dirty work.
1288	* if it fails (!OK) it will unlock everything for us.
1289	* if it succeeds, locks are still valid and locked.
1290	* also, if it is OK, then the anon's page is on the queues.
1291	* if the page is on loan from a uvm_object, then anonget will
1292	* lock that object for us if it does not fail.
1293	*/
1294
1295	error = uvmfault_anonget(ufi, amap, anon);
1296	switch (error) {
1297	case `0`:
1298	break;
1299
1300	case ERESTART:
1301	return ERESTART;
1302
1303	case EAGAIN:
1304	kpause("fltagain1", false, hz/`2`, NULL);
1305	return ERESTART;
1306
1307	default:
1308	return error;
1309	}
1310
1311	/*
1312	* uobj is non null if the page is on loan from an object (i.e. uobj)
1313	*/
1314
1315	uobj = anon->an_page->uobject; / locked by anonget if !NULL /
1316
1317	/ locked: maps(read), amap, anon, uobj(if one) /
1318	KASSERT(mutex_owned(amap->am_lock));
1319	KASSERT(anon->an_lock == amap->am_lock);
1320	KASSERT(uobj == NULL \|\| mutex_owned(uobj->vmobjlock));
1321
1322	/*
1323	* special handling for loaned pages
1324	*/
1325
1326	if (anon->an_page->loan_count) {
1327	error = uvm_fault_upper_loan(ufi, flt, anon, &uobj);
1328	if (error != `0`)
1329	return error;
1330	}
1331
1332	/*
1333	* if we are case 1B then we will need to allocate a new blank
1334	* anon to transfer the data into. note that we have a lock
1335	* on anon, so no one can busy or release the page until we are done.
1336	* also note that the ref count can't drop to zero here because
1337	* it is > 1 and we are only dropping one ref.
1338	*
1339	* in the (hopefully very rare) case that we are out of RAM we
1340	* will unlock, wait for more RAM, and refault.
1341	*
1342	* if we are out of anon VM we kill the process (XXX: could wait?).
1343	*/
1344
1345	if (flt->cow_now && anon->an_ref > `1`) {
1346	flt->promote = true;
1347	error = uvm_fault_upper_promote(ufi, flt, uobj, anon);
1348	} else {
1349	error = uvm_fault_upper_direct(ufi, flt, uobj, anon);
1350	}
1351	return error;
1352	}
1353
1354	/*
1355	* uvm_fault_upper_loan: handle loaned upper page.
1356	*
1357	* 1. if not cow'ing now, simply adjust flt->enter_prot.
1358	* 2. if cow'ing now, and if ref count is 1, break loan.
1359	*/
1360
1361	static int
1362	uvm_fault_upper_loan(
1363	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
1364	struct vm_anon anon, struct* uvm_object **ruobj)
1365	{
1366	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
1367	int error = `0`;
1368	UVMHIST_FUNC("uvm_fault_upper_loan"); UVMHIST_CALLED(maphist);
1369
1370	if (!flt->cow_now) {
1371
1372	/*
1373	* for read faults on loaned pages we just cap the
1374	* protection at read-only.
1375	*/
1376
1377	flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
1378
1379	} else {
1380	/*
1381	* note that we can't allow writes into a loaned page!
1382	*
1383	* if we have a write fault on a loaned page in an
1384	* anon then we need to look at the anon's ref count.
1385	* if it is greater than one then we are going to do
1386	* a normal copy-on-write fault into a new anon (this
1387	* is not a problem). however, if the reference count
1388	* is one (a case where we would normally allow a
1389	* write directly to the page) then we need to kill
1390	* the loan before we continue.
1391	*/
1392
1393	/ >1 case is already ok /
1394	if (anon->an_ref == `1`) {
1395	error = uvm_loanbreak_anon(anon, *ruobj);
1396	if (error != `0`) {
1397	uvmfault_unlockall(ufi, amap, *ruobj);
1398	uvm_wait("flt_noram2");
1399	return ERESTART;
1400	}
1401	/ if we were a loan reciever uobj is gone /
1402	if (*ruobj)
1403	*ruobj = NULL;
1404	}
1405	}
1406	return error;
1407	}
1408
1409	/*
1410	* uvm_fault_upper_promote: promote upper page.
1411	*
1412	* 1. call uvmfault_promote.
1413	* 2. enqueue page.
1414	* 3. deref.
1415	* 4. pass page to uvm_fault_upper_enter.
1416	*/
1417
1418	static int
1419	uvm_fault_upper_promote(
1420	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
1421	struct uvm_object uobj, struct* vm_anon *anon)
1422	{
1423	struct vm_anon * const oanon = anon;
1424	struct vm_page *pg;
1425	int error;
1426	UVMHIST_FUNC("uvm_fault_upper_promote"); UVMHIST_CALLED(maphist);
1427
1428	UVMHIST_LOG(maphist, " case 1B: COW fault",`0`,`0`,`0`,`0`);
1429	uvmexp.flt_acow++;
1430
1431	error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon,
1432	&flt->anon_spare);
1433	switch (error) {
1434	case `0`:
1435	break;
1436	case ERESTART:
1437	return ERESTART;
1438	default:
1439	return error;
1440	}
1441
1442	KASSERT(anon == NULL \|\| anon->an_lock == oanon->an_lock);
1443
1444	pg = anon->an_page;
1445	mutex_enter(&uvm_pageqlock);
1446	uvm_pageenqueue(pg); / uvm_fault_upper_done will activate the page /
1447	mutex_exit(&uvm_pageqlock);
1448	pg->flags &= ~(PG_BUSY\|PG_FAKE);
1449	UVM_PAGE_OWN(pg, NULL);
1450
1451	/ deref: can not drop to zero here by defn! /
1452	KASSERT(oanon->an_ref > `1`);
1453	oanon->an_ref--;
1454
1455	/*
1456	* note: oanon is still locked, as is the new anon. we
1457	* need to check for this later when we unlock oanon; if
1458	* oanon != anon, we'll have to unlock anon, too.
1459	*/
1460
1461	return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon);
1462	}
1463
1464	/*
1465	* uvm_fault_upper_direct: handle direct fault.
1466	*/
1467
1468	static int
1469	uvm_fault_upper_direct(
1470	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
1471	struct uvm_object uobj, struct* vm_anon *anon)
1472	{
1473	struct vm_anon * const oanon = anon;
1474	struct vm_page *pg;
1475	UVMHIST_FUNC("uvm_fault_upper_direct"); UVMHIST_CALLED(maphist);
1476
1477	uvmexp.flt_anon++;
1478	pg = anon->an_page;
1479	if (anon->an_ref > `1`) / disallow writes to ref > 1 anons /
1480	flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
1481
1482	return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon);
1483	}
1484
1485	/*
1486	* uvm_fault_upper_enter: enter h/w mapping of upper page.
1487	*/
1488
1489	static int
1490	uvm_fault_upper_enter(
1491	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
1492	struct uvm_object uobj, struct* vm_anon anon, struct* vm_page *pg,
1493	struct vm_anon *oanon)
1494	{
1495	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
1496	UVMHIST_FUNC("uvm_fault_upper_enter"); UVMHIST_CALLED(maphist);
1497
1498	/ locked: maps(read), amap, oanon, anon(if different from oanon) /
1499	KASSERT(mutex_owned(amap->am_lock));
1500	KASSERT(anon->an_lock == amap->am_lock);
1501	KASSERT(oanon->an_lock == amap->am_lock);
1502	KASSERT(uobj == NULL \|\| mutex_owned(uobj->vmobjlock));
1503
1504	/*
1505	* now map the page in.
1506	*/
1507
1508	UVMHIST_LOG(maphist,
1509	" MAPPING: anon: pm=%p, va=%#lx, pg=%p, promote=%d",
1510	ufi->orig_map->pmap, ufi->orig_rvaddr, pg, flt->promote);
1511	if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
1512	VM_PAGE_TO_PHYS(pg),
1513	flt->enter_prot, flt->access_type \| PMAP_CANFAIL \|
1514	(flt->wire_mapping ? PMAP_WIRED : `0`)) != `0`) {
1515
1516	/*
1517	* No need to undo what we did; we can simply think of
1518	* this as the pmap throwing away the mapping information.
1519	*
1520	* We do, however, have to go through the ReFault path,
1521	* as the map may change while we're asleep.
1522	*/
1523
1524	uvmfault_unlockall(ufi, amap, uobj);
1525	if (!uvm_reclaimable()) {
1526	UVMHIST_LOG(maphist,
1527	"<- failed. out of VM",`0`,`0`,`0`,`0`);
1528	/ XXX instrumentation /
1529	return ENOMEM;
1530	}
1531	/ XXX instrumentation /
1532	uvm_wait("flt_pmfail1");
1533	return ERESTART;
1534	}
1535
1536	uvm_fault_upper_done(ufi, flt, anon, pg);
1537
1538	/*
1539	* done case 1! finish up by unlocking everything and returning success
1540	*/
1541
1542	pmap_update(ufi->orig_map->pmap);
1543	uvmfault_unlockall(ufi, amap, uobj);
1544	return `0`;
1545	}
1546
1547	/*
1548	* uvm_fault_upper_done: queue upper center page.
1549	*/
1550
1551	static void
1552	uvm_fault_upper_done(
1553	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
1554	struct vm_anon anon, struct* vm_page *pg)
1555	{
1556	const bool wire_paging = flt->wire_paging;
1557
1558	UVMHIST_FUNC("uvm_fault_upper_done"); UVMHIST_CALLED(maphist);
1559
1560	/*
1561	* ... update the page queues.
1562	*/
1563
1564	mutex_enter(&uvm_pageqlock);
1565	if (wire_paging) {
1566	uvm_pagewire(pg);
1567
1568	/*
1569	* since the now-wired page cannot be paged out,
1570	* release its swap resources for others to use.
1571	* since an anon with no swap cannot be PG_CLEAN,
1572	* clear its clean flag now.
1573	*/
1574
1575	pg->flags &= ~(PG_CLEAN);
1576
1577	} else {
1578	uvm_pageactivate(pg);
1579	}
1580	mutex_exit(&uvm_pageqlock);
1581
1582	if (wire_paging) {
1583	uvm_anon_dropswap(anon);
1584	}
1585	}
1586
1587	/*
1588	* uvm_fault_lower: handle lower fault.
1589	*
1590	* 1. check uobj
1591	* 1.1. if null, ZFOD.
1592	* 1.2. if not null, look up unnmapped neighbor pages.
1593	* 2. for center page, check if promote.
1594	* 2.1. ZFOD always needs promotion.
1595	* 2.2. other uobjs, when entry is marked COW (usually MAP_PRIVATE vnode).
1596	* 3. if uobj is not ZFOD and page is not found, do i/o.
1597	* 4. dispatch either direct / promote fault.
1598	*/
1599
1600	static int
1601	uvm_fault_lower(
1602	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
1603	struct vm_page **pages)
1604	{
1605	#ifdef DIAGNOSTIC
1606	struct vm_amap *amap = ufi->entry->aref.ar_amap;
1607	#endif
1608	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1609	struct vm_page *uobjpage;
1610	int error;
1611	UVMHIST_FUNC("uvm_fault_lower"); UVMHIST_CALLED(maphist);
1612
1613	/*
1614	* now, if the desired page is not shadowed by the amap and we have
1615	* a backing object that does not have a special fault routine, then
1616	* we ask (with pgo_get) the object for resident pages that we care
1617	* about and attempt to map them in. we do not let pgo_get block
1618	* (PGO_LOCKED).
1619	*/
1620
1621	if (uobj == NULL) {
1622	/ zero fill; don't care neighbor pages /
1623	uobjpage = NULL;
1624	} else {
1625	uvm_fault_lower_lookup(ufi, flt, pages);
1626	uobjpage = pages[flt->centeridx];
1627	}
1628
1629	/*
1630	* note that at this point we are done with any front or back pages.
1631	* we are now going to focus on the center page (i.e. the one we've
1632	* faulted on). if we have faulted on the upper (anon) layer
1633	* [i.e. case 1], then the anon we want is anons[centeridx] (we have
1634	* not touched it yet). if we have faulted on the bottom (uobj)
1635	* layer [i.e. case 2] and the page was both present and available,
1636	* then we've got a pointer to it as "uobjpage" and we've already
1637	* made it BUSY.
1638	*/
1639
1640	/*
1641	* locked:
1642	* maps(read), amap(if there), uobj(if !null), uobjpage(if !null)
1643	*/
1644	KASSERT(amap == NULL \|\| mutex_owned(amap->am_lock));
1645	KASSERT(uobj == NULL \|\| mutex_owned(uobj->vmobjlock));
1646	KASSERT(uobjpage == NULL \|\| (uobjpage->flags & PG_BUSY) != `0`);
1647
1648	/*
1649	* note that uobjpage can not be PGO_DONTCARE at this point. we now
1650	* set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we
1651	* have a backing object, check and see if we are going to promote
1652	* the data up to an anon during the fault.
1653	*/
1654
1655	if (uobj == NULL) {
1656	uobjpage = PGO_DONTCARE;
1657	flt->promote = true; / always need anon here /
1658	} else {
1659	KASSERT(uobjpage != PGO_DONTCARE);
1660	flt->promote = flt->cow_now && UVM_ET_ISCOPYONWRITE(ufi->entry);
1661	}
1662	UVMHIST_LOG(maphist, " case 2 fault: promote=%d, zfill=%d",
1663	flt->promote, (uobj == NULL), `0`,`0`);
1664
1665	/*
1666	* if uobjpage is not null then we do not need to do I/O to get the
1667	* uobjpage.
1668	*
1669	* if uobjpage is null, then we need to unlock and ask the pager to
1670	* get the data for us. once we have the data, we need to reverify
1671	* the state the world. we are currently not holding any resources.
1672	*/
1673
1674	if (uobjpage) {
1675	/ update rusage counters /
1676	curlwp->l_ru.ru_minflt++;
1677	} else {
1678	error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage);
1679	if (error != `0`)
1680	return error;
1681	}
1682
1683	/*
1684	* locked:
1685	* maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
1686	*/
1687	KASSERT(amap == NULL \|\| mutex_owned(amap->am_lock));
1688	KASSERT(uobj == NULL \|\| mutex_owned(uobj->vmobjlock));
1689	KASSERT(uobj == NULL \|\| (uobjpage->flags & PG_BUSY) != `0`);
1690
1691	/*
1692	* notes:
1693	* - at this point uobjpage can not be NULL
1694	* - at this point uobjpage can not be PG_RELEASED (since we checked
1695	* for it above)
1696	* - at this point uobjpage could be PG_WANTED (handle later)
1697	*/
1698
1699	KASSERT(uobjpage != NULL);
1700	KASSERT(uobj == NULL \|\| uobj == uobjpage->uobject);
1701	KASSERT(uobj == NULL \|\| !UVM_OBJ_IS_CLEAN(uobjpage->uobject) \|\|
1702	(uobjpage->flags & PG_CLEAN) != `0`);
1703
1704	if (!flt->promote) {
1705	error = uvm_fault_lower_direct(ufi, flt, uobj, uobjpage);
1706	} else {
1707	error = uvm_fault_lower_promote(ufi, flt, uobj, uobjpage);
1708	}
1709	return error;
1710	}
1711
1712	/*
1713	* uvm_fault_lower_lookup: look up on-memory uobj pages.
1714	*
1715	* 1. get on-memory pages.
1716	* 2. if failed, give up (get only center page later).
1717	* 3. if succeeded, enter h/w mapping of neighbor pages.
1718	*/
1719
1720	static void
1721	uvm_fault_lower_lookup(
1722	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
1723	struct vm_page **pages)
1724	{
1725	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1726	int lcv, gotpages;
1727	vaddr_t currva;
1728	UVMHIST_FUNC("uvm_fault_lower_lookup"); UVMHIST_CALLED(maphist);
1729
1730	mutex_enter(uobj->vmobjlock);
1731	/ Locked: maps(read), amap(if there), uobj /
1732
1733	uvmexp.fltlget++;
1734	gotpages = flt->npages;
1735	(void) uobj->pgops->pgo_get(uobj,
1736	ufi->entry->offset + flt->startva - ufi->entry->start,
1737	pages, &gotpages, flt->centeridx,
1738	flt->access_type & MASK(ufi->entry), ufi->entry->advice, PGO_LOCKED);
1739
1740	KASSERT(mutex_owned(uobj->vmobjlock));
1741
1742	/*
1743	* check for pages to map, if we got any
1744	*/
1745
1746	if (gotpages == `0`) {
1747	pages[flt->centeridx] = NULL;
1748	return;
1749	}
1750
1751	currva = flt->startva;
1752	for (lcv = `0`; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
1753	struct vm_page *curpg;
1754
1755	curpg = pages[lcv];
1756	if (curpg == NULL \|\| curpg == PGO_DONTCARE) {
1757	continue;
1758	}
1759	KASSERT(curpg->uobject == uobj);
1760
1761	/*
1762	* if center page is resident and not PG_BUSY\|PG_RELEASED
1763	* then pgo_get made it PG_BUSY for us and gave us a handle
1764	* to it.
1765	*/
1766
1767	if (lcv == flt->centeridx) {
1768	UVMHIST_LOG(maphist, " got uobjpage "
1769	"(0x%x) with locked get",
1770	curpg, `0`,`0`,`0`);
1771	} else {
1772	bool readonly = (curpg->flags & PG_RDONLY)
1773	\|\| (curpg->loan_count > `0`)
1774	\|\| UVM_OBJ_NEEDS_WRITEFAULT(curpg->uobject);
1775
1776	uvm_fault_lower_neighbor(ufi, flt,
1777	currva, curpg, readonly);
1778	}
1779	}
1780	pmap_update(ufi->orig_map->pmap);
1781	}
1782
1783	/*
1784	* uvm_fault_lower_neighbor: enter h/w mapping of lower neighbor page.
1785	*/
1786
1787	static void
1788	uvm_fault_lower_neighbor(
1789	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
1790	vaddr_t currva, struct vm_page *pg, bool readonly)
1791	{
1792	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1793
1794	/ locked: maps(read), amap(if there), uobj /
1795
1796	/*
1797	* calling pgo_get with PGO_LOCKED returns us pages which
1798	* are neither busy nor released, so we don't need to check
1799	* for this. we can just directly enter the pages.
1800	*/
1801
1802	mutex_enter(&uvm_pageqlock);
1803	uvm_pageenqueue(pg);
1804	mutex_exit(&uvm_pageqlock);
1805	UVMHIST_LOG(maphist,
1806	" MAPPING: n obj: pm=%p, va=%#lx, pg=%p",
1807	ufi->orig_map->pmap, currva, pg, `0`);
1808	uvmexp.fltnomap++;
1809
1810	/*
1811	* Since this page isn't the page that's actually faulting,
1812	* ignore pmap_enter() failures; it's not critical that we
1813	* enter these right now.
1814	* NOTE: page can't be PG_WANTED or PG_RELEASED because we've
1815	* held the lock the whole time we've had the handle.
1816	*/
1817	KASSERT((pg->flags & PG_PAGEOUT) == `0`);
1818	KASSERT((pg->flags & PG_RELEASED) == `0`);
1819	KASSERT((pg->flags & PG_WANTED) == `0`);
1820	KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) \|\| (pg->flags & PG_CLEAN) != `0`);
1821	pg->flags &= ~(PG_BUSY);
1822	UVM_PAGE_OWN(pg, NULL);
1823
1824	KASSERT(mutex_owned(pg->uobject->vmobjlock));
1825	(void) pmap_enter(ufi->orig_map->pmap, currva,
1826	VM_PAGE_TO_PHYS(pg),
1827	readonly ? (flt->enter_prot & ~VM_PROT_WRITE) :
1828	flt->enter_prot & MASK(ufi->entry),
1829	PMAP_CANFAIL \| (flt->wire_mapping ? PMAP_WIRED : `0`));
1830	}
1831
1832	/*
1833	* uvm_fault_lower_io: get lower page from backing store.
1834	*
1835	* 1. unlock everything, because i/o will block.
1836	* 2. call pgo_get.
1837	* 3. if failed, recover.
1838	* 4. if succeeded, relock everything and verify things.
1839	*/
1840
1841	static int
1842	uvm_fault_lower_io(
1843	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
1844	struct uvm_object ruobj, struct vm_page ruobjpage)
1845	{
1846	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
1847	struct uvm_object uobj = ruobj;
1848	struct vm_page *pg;
1849	bool locked;
1850	int gotpages;
1851	int error;
1852	voff_t uoff;
1853	UVMHIST_FUNC("uvm_fault_lower_io"); UVMHIST_CALLED(maphist);
1854
1855	/ update rusage counters /
1856	curlwp->l_ru.ru_majflt++;
1857
1858	/ Locked: maps(read), amap(if there), uobj /
1859	uvmfault_unlockall(ufi, amap, NULL);
1860
1861	/ Locked: uobj /
1862	KASSERT(uobj == NULL \|\| mutex_owned(uobj->vmobjlock));
1863
1864	uvmexp.fltget++;
1865	gotpages = `1`;
1866	pg = NULL;
1867	uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset;
1868	error = uobj->pgops->pgo_get(uobj, uoff, &pg, &gotpages,
1869	`0`, flt->access_type & MASK(ufi->entry), ufi->entry->advice,
1870	PGO_SYNCIO);
1871	/ locked: pg(if no error) /
1872
1873	/*
1874	* recover from I/O
1875	*/
1876
1877	if (error) {
1878	if (error == EAGAIN) {
1879	UVMHIST_LOG(maphist,
1880	" pgo_get says TRY AGAIN!",`0`,`0`,`0`,`0`);
1881	kpause("fltagain2", false, hz/`2`, NULL);
1882	return ERESTART;
1883	}
1884
1885	#if 0
1886	KASSERT(error != ERESTART);
1887	#else
1888	/ XXXUEBS don't re-fault? /
1889	if (error == ERESTART)
1890	error = EIO;
1891	#endif
1892
1893	UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)",
1894	error, `0`,`0`,`0`);
1895	return error;
1896	}
1897
1898	/*
1899	* re-verify the state of the world by first trying to relock
1900	* the maps. always relock the object.
1901	*/
1902
1903	locked = uvmfault_relock(ufi);
1904	if (locked && amap)
1905	amap_lock(amap);
1906
1907	/ might be changed /
1908	uobj = pg->uobject;
1909
1910	mutex_enter(uobj->vmobjlock);
1911	KASSERT((pg->flags & PG_BUSY) != `0`);
1912
1913	mutex_enter(&uvm_pageqlock);
1914	uvm_pageactivate(pg);
1915	mutex_exit(&uvm_pageqlock);
1916
1917	/ locked(locked): maps(read), amap(if !null), uobj, pg /
1918	/ locked(!locked): uobj, pg /
1919
1920	/*
1921	* verify that the page has not be released and re-verify
1922	* that amap slot is still free. if there is a problem,
1923	* we unlock and clean up.
1924	*/
1925
1926	if ((pg->flags & PG_RELEASED) != `0` \|\|
1927	(locked && amap && amap_lookup(&ufi->entry->aref,
1928	ufi->orig_rvaddr - ufi->entry->start))) {
1929	if (locked)
1930	uvmfault_unlockall(ufi, amap, NULL);
1931	locked = false;
1932	}
1933
1934	/*
1935	* didn't get the lock? release the page and retry.
1936	*/
1937
1938	if (locked == false) {
1939	UVMHIST_LOG(maphist,
1940	" wasn't able to relock after fault: retry",
1941	`0`,`0`,`0`,`0`);
1942	if (pg->flags & PG_WANTED) {
1943	wakeup(pg);
1944	}
1945	if ((pg->flags & PG_RELEASED) == `0`) {
1946	pg->flags &= ~(PG_BUSY \| PG_WANTED);
1947	UVM_PAGE_OWN(pg, NULL);
1948	} else {
1949	uvmexp.fltpgrele++;
1950	uvm_pagefree(pg);
1951	}
1952	mutex_exit(uobj->vmobjlock);
1953	return ERESTART;
1954	}
1955
1956	/*
1957	* we have the data in pg which is busy and
1958	* not released. we are holding object lock (so the page
1959	* can't be released on us).
1960	*/
1961
1962	/ locked: maps(read), amap(if !null), uobj, pg /
1963
1964	*ruobj = uobj;
1965	*ruobjpage = pg;
1966	return `0`;
1967	}
1968
1969	/*
1970	* uvm_fault_lower_direct: fault lower center page
1971	*
1972	* 1. adjust flt->enter_prot.
1973	* 2. if page is loaned, resolve.
1974	*/
1975
1976	int
1977	uvm_fault_lower_direct(
1978	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
1979	struct uvm_object uobj, struct* vm_page *uobjpage)
1980	{
1981	struct vm_page *pg;
1982	UVMHIST_FUNC("uvm_fault_lower_direct"); UVMHIST_CALLED(maphist);
1983
1984	/*
1985	* we are not promoting. if the mapping is COW ensure that we
1986	* don't give more access than we should (e.g. when doing a read
1987	* fault on a COPYONWRITE mapping we want to map the COW page in
1988	* R/O even though the entry protection could be R/W).
1989	*
1990	* set "pg" to the page we want to map in (uobjpage, usually)
1991	*/
1992
1993	uvmexp.flt_obj++;
1994	if (UVM_ET_ISCOPYONWRITE(ufi->entry) \|\|
1995	UVM_OBJ_NEEDS_WRITEFAULT(uobjpage->uobject))
1996	flt->enter_prot &= ~VM_PROT_WRITE;
1997	pg = uobjpage; / map in the actual object /
1998
1999	KASSERT(uobjpage != PGO_DONTCARE);
2000
2001	/*
2002	* we are faulting directly on the page. be careful
2003	* about writing to loaned pages...
2004	*/
2005
2006	if (uobjpage->loan_count) {
2007	uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage);
2008	}
2009	KASSERT(pg == uobjpage);
2010
2011	KASSERT(uobj == NULL \|\| (uobjpage->flags & PG_BUSY) != `0`);
2012	return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg);
2013	}
2014
2015	/*
2016	* uvm_fault_lower_direct_loan: resolve loaned page.
2017	*
2018	* 1. if not cow'ing, adjust flt->enter_prot.
2019	* 2. if cow'ing, break loan.
2020	*/
2021
2022	static int
2023	uvm_fault_lower_direct_loan(
2024	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
2025	struct uvm_object uobj, struct* vm_page **rpg,
2026	struct vm_page **ruobjpage)
2027	{
2028	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
2029	struct vm_page *pg;
2030	struct vm_page uobjpage = ruobjpage;
2031	UVMHIST_FUNC("uvm_fault_lower_direct_loan"); UVMHIST_CALLED(maphist);
2032
2033	if (!flt->cow_now) {
2034	/ read fault: cap the protection at readonly /
2035	/ cap! /
2036	flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
2037	} else {
2038	/ write fault: must break the loan here /
2039
2040	pg = uvm_loanbreak(uobjpage);
2041	if (pg == NULL) {
2042
2043	/*
2044	* drop ownership of page, it can't be released
2045	*/
2046
2047	if (uobjpage->flags & PG_WANTED)
2048	wakeup(uobjpage);
2049	uobjpage->flags &= ~(PG_BUSY\|PG_WANTED);
2050	UVM_PAGE_OWN(uobjpage, NULL);
2051
2052	uvmfault_unlockall(ufi, amap, uobj);
2053	UVMHIST_LOG(maphist,
2054	" out of RAM breaking loan, waiting",
2055	`0`,`0`,`0`,`0`);
2056	uvmexp.fltnoram++;
2057	uvm_wait("flt_noram4");
2058	return ERESTART;
2059	}
2060	*rpg = pg;
2061	*ruobjpage = pg;
2062	}
2063	return `0`;
2064	}
2065
2066	/*
2067	* uvm_fault_lower_promote: promote lower page.
2068	*
2069	* 1. call uvmfault_promote.
2070	* 2. fill in data.
2071	* 3. if not ZFOD, dispose old page.
2072	*/
2073
2074	int
2075	uvm_fault_lower_promote(
2076	struct uvm_faultinfo ufi, struct* uvm_faultctx *flt,
2077	struct uvm_object uobj, struct* vm_page *uobjpage)
2078	{
2079	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
2080	struct vm_anon *anon;
2081	struct vm_page *pg;
2082	int error;
2083	UVMHIST_FUNC("uvm_fault_lower_promote"); UVMHIST_CALLED(maphist);
2084
2085	KASSERT(amap != NULL);
2086
2087	/*
2088	* If we are going to promote the data to an anon we
2089	* allocate a blank anon here and plug it into our amap.
2090	*/
2091	error = uvmfault_promote(ufi, NULL, uobjpage,
2092	&anon, &flt->anon_spare);
2093	switch (error) {
2094	case `0`:
2095	break;
2096	case ERESTART:
2097	return ERESTART;
2098	default:
2099	return error;
2100	}
2101
2102	pg = anon->an_page;
2103
2104	/*
2105	* Fill in the data.
2106	*/
2107	KASSERT(uobj == NULL \|\| (uobjpage->flags & PG_BUSY) != `0`);
2108
2109	if (uobjpage != PGO_DONTCARE) {
2110	uvmexp.flt_prcopy++;
2111
2112	/*
2113	* promote to shared amap? make sure all sharing
2114	* procs see it
2115	*/
2116
2117	if ((amap_flags(amap) & AMAP_SHARED) != `0`) {
2118	pmap_page_protect(uobjpage, VM_PROT_NONE);
2119	/*
2120	* XXX: PAGE MIGHT BE WIRED!
2121	*/
2122	}
2123
2124	/*
2125	* dispose of uobjpage. it can't be PG_RELEASED
2126	* since we still hold the object lock.
2127	*/
2128
2129	if (uobjpage->flags & PG_WANTED) {
2130	/ still have the obj lock /
2131	wakeup(uobjpage);
2132	}
2133	uobjpage->flags &= ~(PG_BUSY\|PG_WANTED);
2134	UVM_PAGE_OWN(uobjpage, NULL);
2135
2136	UVMHIST_LOG(maphist,
2137	" promote uobjpage 0x%x to anon/page 0x%x/0x%x",
2138	uobjpage, anon, pg, `0`);
2139
2140	} else {
2141	uvmexp.flt_przero++;
2142
2143	/*
2144	* Page is zero'd and marked dirty by
2145	* uvmfault_promote().
2146	*/
2147
2148	UVMHIST_LOG(maphist," zero fill anon/page 0x%x/0%x",
2149	anon, pg, `0`, `0`);
2150	}
2151
2152	return uvm_fault_lower_enter(ufi, flt, uobj, anon, pg);
2153	}
2154
2155	/*
2156	* uvm_fault_lower_enter: enter h/w mapping of lower page or anon page promoted
2157	* from the lower page.
2158	*/
2159
2160	int
2161	uvm_fault_lower_enter(
2162	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
2163	struct uvm_object *uobj,
2164	struct vm_anon anon, struct* vm_page *pg)
2165	{
2166	struct vm_amap * const amap = ufi->entry->aref.ar_amap;
2167	int error;
2168	UVMHIST_FUNC("uvm_fault_lower_enter"); UVMHIST_CALLED(maphist);
2169
2170	/*
2171	* Locked:
2172	*
2173	* maps(read), amap(if !null), uobj(if !null),
2174	* anon(if !null), pg(if anon), unlock_uobj(if !null)
2175	*
2176	* Note: pg is either the uobjpage or the new page in the new anon.
2177	*/
2178	KASSERT(amap == NULL \|\| mutex_owned(amap->am_lock));
2179	KASSERT(uobj == NULL \|\| mutex_owned(uobj->vmobjlock));
2180	KASSERT(anon == NULL \|\| anon->an_lock == amap->am_lock);
2181	KASSERT((pg->flags & PG_BUSY) != `0`);
2182
2183	/*
2184	* all resources are present. we can now map it in and free our
2185	* resources.
2186	*/
2187
2188	UVMHIST_LOG(maphist,
2189	" MAPPING: case2: pm=%p, va=%#lx, pg=%#x, promote=%d",
2190	ufi->orig_map->pmap, ufi->orig_rvaddr, pg, flt->promote);
2191	KASSERT((flt->access_type & VM_PROT_WRITE) == `0` \|\|
2192	(pg->flags & PG_RDONLY) == `0`);
2193	if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
2194	VM_PAGE_TO_PHYS(pg),
2195	(pg->flags & PG_RDONLY) != `0` ?
2196	flt->enter_prot & ~VM_PROT_WRITE : flt->enter_prot,
2197	flt->access_type \| PMAP_CANFAIL \|
2198	(flt->wire_mapping ? PMAP_WIRED : `0`)) != `0`) {
2199
2200	/*
2201	* No need to undo what we did; we can simply think of
2202	* this as the pmap throwing away the mapping information.
2203	*
2204	* We do, however, have to go through the ReFault path,
2205	* as the map may change while we're asleep.
2206	*/
2207
2208	/*
2209	* ensure that the page is queued in the case that
2210	* we just promoted the page.
2211	*/
2212
2213	mutex_enter(&uvm_pageqlock);
2214	uvm_pageenqueue(pg);
2215	mutex_exit(&uvm_pageqlock);
2216
2217	if (pg->flags & PG_WANTED)
2218	wakeup(pg);
2219
2220	/*
2221	* note that pg can't be PG_RELEASED since we did not drop
2222	* the object lock since the last time we checked.
2223	*/
2224	KASSERT((pg->flags & PG_RELEASED) == `0`);
2225
2226	pg->flags &= ~(PG_BUSY\|PG_FAKE\|PG_WANTED);
2227	UVM_PAGE_OWN(pg, NULL);
2228
2229	uvmfault_unlockall(ufi, amap, uobj);
2230	if (!uvm_reclaimable()) {
2231	UVMHIST_LOG(maphist,
2232	"<- failed. out of VM",`0`,`0`,`0`,`0`);
2233	/ XXX instrumentation /
2234	error = ENOMEM;
2235	return error;
2236	}
2237	/ XXX instrumentation /
2238	uvm_wait("flt_pmfail2");
2239	return ERESTART;
2240	}
2241
2242	uvm_fault_lower_done(ufi, flt, uobj, pg);
2243
2244	/*
2245	* note that pg can't be PG_RELEASED since we did not drop the object
2246	* lock since the last time we checked.
2247	*/
2248	KASSERT((pg->flags & PG_RELEASED) == `0`);
2249	if (pg->flags & PG_WANTED)
2250	wakeup(pg);
2251	pg->flags &= ~(PG_BUSY\|PG_FAKE\|PG_WANTED);
2252	UVM_PAGE_OWN(pg, NULL);
2253
2254	pmap_update(ufi->orig_map->pmap);
2255	uvmfault_unlockall(ufi, amap, uobj);
2256
2257	UVMHIST_LOG(maphist, "<- done (SUCCESS!)",`0`,`0`,`0`,`0`);
2258	return `0`;
2259	}
2260
2261	/*
2262	* uvm_fault_lower_done: queue lower center page.
2263	*/
2264
2265	void
2266	uvm_fault_lower_done(
2267	struct uvm_faultinfo ufi, const* struct uvm_faultctx *flt,
2268	struct uvm_object uobj, struct* vm_page *pg)
2269	{
2270	bool dropswap = false;
2271
2272	UVMHIST_FUNC("uvm_fault_lower_done"); UVMHIST_CALLED(maphist);
2273
2274	mutex_enter(&uvm_pageqlock);
2275	if (flt->wire_paging) {
2276	uvm_pagewire(pg);
2277	if (pg->pqflags & PQ_AOBJ) {
2278
2279	/*
2280	* since the now-wired page cannot be paged out,
2281	* release its swap resources for others to use.
2282	* since an aobj page with no swap cannot be PG_CLEAN,
2283	* clear its clean flag now.
2284	*/
2285
2286	KASSERT(uobj != NULL);
2287	pg->flags &= ~(PG_CLEAN);
2288	dropswap = true;
2289	}
2290	} else {
2291	uvm_pageactivate(pg);
2292	}
2293	mutex_exit(&uvm_pageqlock);
2294
2295	if (dropswap) {
2296	uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
2297	}
2298	}
2299
2300
2301	/*
2302	* uvm_fault_wire: wire down a range of virtual addresses in a map.
2303	*
2304	* => map may be read-locked by caller, but MUST NOT be write-locked.
2305	* => if map is read-locked, any operations which may cause map to
2306	* be write-locked in uvm_fault() must be taken care of by
2307	* the caller. See uvm_map_pageable().
2308	*/
2309
2310	int
2311	uvm_fault_wire(struct vm_map *map, vaddr_t start, vaddr_t end,
2312	vm_prot_t access_type, int maxprot)
2313	{
2314	vaddr_t va;
2315	int error;
2316
2317	/*
2318	* now fault it in a page at a time. if the fault fails then we have
2319	* to undo what we have done. note that in uvm_fault VM_PROT_NONE
2320	* is replaced with the max protection if fault_type is VM_FAULT_WIRE.
2321	*/
2322
2323	/*
2324	* XXX work around overflowing a vaddr_t. this prevents us from
2325	* wiring the last page in the address space, though.
2326	*/
2327	if (start > end) {
2328	return EFAULT;
2329	}
2330
2331	for (va = start; va < end; va += PAGE_SIZE) {
2332	error = uvm_fault_internal(map, va, access_type,
2333	(maxprot ? UVM_FAULT_MAXPROT : `0`) \| UVM_FAULT_WIRE);
2334	if (error) {
2335	if (va != start) {
2336	uvm_fault_unwire(map, start, va);
2337	}
2338	return error;
2339	}
2340	}
2341	return `0`;
2342	}
2343
2344	/*
2345	* uvm_fault_unwire(): unwire range of virtual space.
2346	*/
2347
2348	void
2349	uvm_fault_unwire(struct vm_map *map, vaddr_t start, vaddr_t end)
2350	{
2351	vm_map_lock_read(map);
2352	uvm_fault_unwire_locked(map, start, end);
2353	vm_map_unlock_read(map);
2354	}
2355
2356	/*
2357	* uvm_fault_unwire_locked(): the guts of uvm_fault_unwire().
2358	*
2359	* => map must be at least read-locked.
2360	*/
2361
2362	void
2363	uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end)
2364	{
2365	struct vm_map_entry entry, oentry;
2366	pmap_t pmap = vm_map_pmap(map);
2367	vaddr_t va;
2368	paddr_t pa;
2369	struct vm_page *pg;
2370
2371	/*
2372	* we assume that the area we are unwiring has actually been wired
2373	* in the first place. this means that we should be able to extract
2374	* the PAs from the pmap. we also lock out the page daemon so that
2375	* we can call uvm_pageunwire.
2376	*/
2377
2378	/*
2379	* find the beginning map entry for the region.
2380	*/
2381
2382	KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map));
2383	if (uvm_map_lookup_entry(map, start, &entry) == false)
2384	panic("uvm_fault_unwire_locked: address not in map");
2385
2386	oentry = NULL;
2387	for (va = start; va < end; va += PAGE_SIZE) {
2388	if (pmap_extract(pmap, va, &pa) == false)
2389	continue;
2390
2391	/*
2392	* find the map entry for the current address.
2393	*/
2394
2395	KASSERT(va >= entry->start);
2396	while (va >= entry->end) {
2397	KASSERT(entry->next != &map->header &&
2398	entry->next->start <= entry->end);
2399	entry = entry->next;
2400	}
2401
2402	/*
2403	* lock it.
2404	*/
2405
2406	if (entry != oentry) {
2407	if (oentry != NULL) {
2408	mutex_exit(&uvm_pageqlock);
2409	uvm_map_unlock_entry(oentry);
2410	}
2411	uvm_map_lock_entry(entry);
2412	mutex_enter(&uvm_pageqlock);
2413	oentry = entry;
2414	}
2415
2416	/*
2417	* if the entry is no longer wired, tell the pmap.
2418	*/
2419
2420	if (VM_MAPENT_ISWIRED(entry) == `0`)
2421	pmap_unwire(pmap, va);
2422
2423	pg = PHYS_TO_VM_PAGE(pa);
2424	if (pg)
2425	uvm_pageunwire(pg);
2426	}
2427
2428	if (oentry != NULL) {
2429	mutex_exit(&uvm_pageqlock);
2430	uvm_map_unlock_entry(entry);
2431	}
2432	}
2433

Browse the source code of src/src/sys/uvm/uvm_fault.c