1/* $NetBSD: uvm_pdpolicy_clock.c,v 1.17 2012/01/30 17:21:52 para Exp $ */
2/* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3
4/*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
38 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65#if defined(PDSIM)
66
67#include "pdsim.h"
68
69#else /* defined(PDSIM) */
70
71#include <sys/cdefs.h>
72__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.17 2012/01/30 17:21:52 para Exp $");
73
74#include <sys/param.h>
75#include <sys/proc.h>
76#include <sys/systm.h>
77#include <sys/kernel.h>
78
79#include <uvm/uvm.h>
80#include <uvm/uvm_pdpolicy.h>
81#include <uvm/uvm_pdpolicy_impl.h>
82
83#endif /* defined(PDSIM) */
84
85#define PQ_INACTIVE PQ_PRIVATE1 /* page is in inactive list */
86#define PQ_ACTIVE PQ_PRIVATE2 /* page is in active list */
87
88#if !defined(CLOCK_INACTIVEPCT)
89#define CLOCK_INACTIVEPCT 33
90#endif /* !defined(CLOCK_INACTIVEPCT) */
91
92struct uvmpdpol_globalstate {
93 struct pglist s_activeq; /* allocated pages, in use */
94 struct pglist s_inactiveq; /* pages between the clock hands */
95 int s_active;
96 int s_inactive;
97 int s_inactarg;
98 struct uvm_pctparam s_anonmin;
99 struct uvm_pctparam s_filemin;
100 struct uvm_pctparam s_execmin;
101 struct uvm_pctparam s_anonmax;
102 struct uvm_pctparam s_filemax;
103 struct uvm_pctparam s_execmax;
104 struct uvm_pctparam s_inactivepct;
105};
106
107struct uvmpdpol_scanstate {
108 bool ss_first;
109 bool ss_anonreact, ss_filereact, ss_execreact;
110 struct vm_page *ss_nextpg;
111};
112
113static struct uvmpdpol_globalstate pdpol_state;
114static struct uvmpdpol_scanstate pdpol_scanstate;
115
116PDPOL_EVCNT_DEFINE(reactexec)
117PDPOL_EVCNT_DEFINE(reactfile)
118PDPOL_EVCNT_DEFINE(reactanon)
119
120static void
121clock_tune(void)
122{
123 struct uvmpdpol_globalstate *s = &pdpol_state;
124
125 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
126 s->s_active + s->s_inactive);
127 if (s->s_inactarg <= uvmexp.freetarg) {
128 s->s_inactarg = uvmexp.freetarg + 1;
129 }
130}
131
132void
133uvmpdpol_scaninit(void)
134{
135 struct uvmpdpol_globalstate *s = &pdpol_state;
136 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
137 int t;
138 bool anonunder, fileunder, execunder;
139 bool anonover, fileover, execover;
140 bool anonreact, filereact, execreact;
141
142 /*
143 * decide which types of pages we want to reactivate instead of freeing
144 * to keep usage within the minimum and maximum usage limits.
145 */
146
147 t = s->s_active + s->s_inactive + uvmexp.free;
148 anonunder = uvmexp.anonpages <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
149 fileunder = uvmexp.filepages <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
150 execunder = uvmexp.execpages <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
151 anonover = uvmexp.anonpages > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
152 fileover = uvmexp.filepages > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
153 execover = uvmexp.execpages > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
154 anonreact = anonunder || (!anonover && (fileover || execover));
155 filereact = fileunder || (!fileover && (anonover || execover));
156 execreact = execunder || (!execover && (anonover || fileover));
157 if (filereact && execreact && (anonreact || uvm_swapisfull())) {
158 anonreact = filereact = execreact = false;
159 }
160 ss->ss_anonreact = anonreact;
161 ss->ss_filereact = filereact;
162 ss->ss_execreact = execreact;
163
164 ss->ss_first = true;
165}
166
167struct vm_page *
168uvmpdpol_selectvictim(void)
169{
170 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
171 struct vm_page *pg;
172 kmutex_t *lock;
173
174 KASSERT(mutex_owned(&uvm_pageqlock));
175
176 while (/* CONSTCOND */ 1) {
177 struct vm_anon *anon;
178 struct uvm_object *uobj;
179
180 if (ss->ss_first) {
181 pg = TAILQ_FIRST(&pdpol_state.s_inactiveq);
182 ss->ss_first = false;
183 } else {
184 pg = ss->ss_nextpg;
185 if (pg != NULL && (pg->pqflags & PQ_INACTIVE) == 0) {
186 pg = TAILQ_FIRST(&pdpol_state.s_inactiveq);
187 }
188 }
189 if (pg == NULL) {
190 break;
191 }
192 ss->ss_nextpg = TAILQ_NEXT(pg, pageq.queue);
193
194 uvmexp.pdscans++;
195
196 /*
197 * move referenced pages back to active queue and
198 * skip to next page.
199 */
200
201 lock = uvmpd_trylockowner(pg);
202 if (lock != NULL) {
203 if (pmap_is_referenced(pg)) {
204 uvmpdpol_pageactivate(pg);
205 uvmexp.pdreact++;
206 mutex_exit(lock);
207 continue;
208 }
209 mutex_exit(lock);
210 }
211
212 anon = pg->uanon;
213 uobj = pg->uobject;
214
215 /*
216 * enforce the minimum thresholds on different
217 * types of memory usage. if reusing the current
218 * page would reduce that type of usage below its
219 * minimum, reactivate the page instead and move
220 * on to the next page.
221 */
222
223 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
224 uvmpdpol_pageactivate(pg);
225 PDPOL_EVCNT_INCR(reactexec);
226 continue;
227 }
228 if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
229 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
230 uvmpdpol_pageactivate(pg);
231 PDPOL_EVCNT_INCR(reactfile);
232 continue;
233 }
234 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
235 uvmpdpol_pageactivate(pg);
236 PDPOL_EVCNT_INCR(reactanon);
237 continue;
238 }
239
240 break;
241 }
242
243 return pg;
244}
245
246void
247uvmpdpol_balancequeue(int swap_shortage)
248{
249 int inactive_shortage;
250 struct vm_page *p, *nextpg;
251 kmutex_t *lock;
252
253 /*
254 * we have done the scan to get free pages. now we work on meeting
255 * our inactive target.
256 */
257
258 inactive_shortage = pdpol_state.s_inactarg - pdpol_state.s_inactive;
259 for (p = TAILQ_FIRST(&pdpol_state.s_activeq);
260 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
261 p = nextpg) {
262 nextpg = TAILQ_NEXT(p, pageq.queue);
263
264 /*
265 * if there's a shortage of swap slots, try to free it.
266 */
267
268 if (swap_shortage > 0 && (p->pqflags & PQ_SWAPBACKED) != 0) {
269 if (uvmpd_trydropswap(p)) {
270 swap_shortage--;
271 }
272 }
273
274 /*
275 * if there's a shortage of inactive pages, deactivate.
276 */
277
278 if (inactive_shortage <= 0) {
279 continue;
280 }
281
282 /* no need to check wire_count as pg is "active" */
283 lock = uvmpd_trylockowner(p);
284 if (lock != NULL) {
285 uvmpdpol_pagedeactivate(p);
286 uvmexp.pddeact++;
287 inactive_shortage--;
288 mutex_exit(lock);
289 }
290 }
291}
292
293void
294uvmpdpol_pagedeactivate(struct vm_page *pg)
295{
296
297 KASSERT(uvm_page_locked_p(pg));
298 KASSERT(mutex_owned(&uvm_pageqlock));
299
300 if (pg->pqflags & PQ_ACTIVE) {
301 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue);
302 pg->pqflags &= ~PQ_ACTIVE;
303 KASSERT(pdpol_state.s_active > 0);
304 pdpol_state.s_active--;
305 }
306 if ((pg->pqflags & PQ_INACTIVE) == 0) {
307 KASSERT(pg->wire_count == 0);
308 pmap_clear_reference(pg);
309 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pageq.queue);
310 pg->pqflags |= PQ_INACTIVE;
311 pdpol_state.s_inactive++;
312 }
313}
314
315void
316uvmpdpol_pageactivate(struct vm_page *pg)
317{
318
319 uvmpdpol_pagedequeue(pg);
320 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pageq.queue);
321 pg->pqflags |= PQ_ACTIVE;
322 pdpol_state.s_active++;
323}
324
325void
326uvmpdpol_pagedequeue(struct vm_page *pg)
327{
328
329 if (pg->pqflags & PQ_ACTIVE) {
330 KASSERT(mutex_owned(&uvm_pageqlock));
331 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue);
332 pg->pqflags &= ~PQ_ACTIVE;
333 KASSERT(pdpol_state.s_active > 0);
334 pdpol_state.s_active--;
335 } else if (pg->pqflags & PQ_INACTIVE) {
336 KASSERT(mutex_owned(&uvm_pageqlock));
337 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pageq.queue);
338 pg->pqflags &= ~PQ_INACTIVE;
339 KASSERT(pdpol_state.s_inactive > 0);
340 pdpol_state.s_inactive--;
341 }
342}
343
344void
345uvmpdpol_pageenqueue(struct vm_page *pg)
346{
347
348 uvmpdpol_pageactivate(pg);
349}
350
351void
352uvmpdpol_anfree(struct vm_anon *an)
353{
354}
355
356bool
357uvmpdpol_pageisqueued_p(struct vm_page *pg)
358{
359
360 return (pg->pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
361}
362
363void
364uvmpdpol_estimatepageable(int *active, int *inactive)
365{
366
367 if (active) {
368 *active = pdpol_state.s_active;
369 }
370 if (inactive) {
371 *inactive = pdpol_state.s_inactive;
372 }
373}
374
375#if !defined(PDSIM)
376static int
377min_check(struct uvm_pctparam *pct, int t)
378{
379 struct uvmpdpol_globalstate *s = &pdpol_state;
380 int total = t;
381
382 if (pct != &s->s_anonmin) {
383 total += uvm_pctparam_get(&s->s_anonmin);
384 }
385 if (pct != &s->s_filemin) {
386 total += uvm_pctparam_get(&s->s_filemin);
387 }
388 if (pct != &s->s_execmin) {
389 total += uvm_pctparam_get(&s->s_execmin);
390 }
391 if (total > 95) {
392 return EINVAL;
393 }
394 return 0;
395}
396#endif /* !defined(PDSIM) */
397
398void
399uvmpdpol_init(void)
400{
401 struct uvmpdpol_globalstate *s = &pdpol_state;
402
403 TAILQ_INIT(&s->s_activeq);
404 TAILQ_INIT(&s->s_inactiveq);
405 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
406 uvm_pctparam_init(&s->s_anonmin, 10, min_check);
407 uvm_pctparam_init(&s->s_filemin, 10, min_check);
408 uvm_pctparam_init(&s->s_execmin, 5, min_check);
409 uvm_pctparam_init(&s->s_anonmax, 80, NULL);
410 uvm_pctparam_init(&s->s_filemax, 50, NULL);
411 uvm_pctparam_init(&s->s_execmax, 30, NULL);
412}
413
414void
415uvmpdpol_reinit(void)
416{
417}
418
419bool
420uvmpdpol_needsscan_p(void)
421{
422
423 return pdpol_state.s_inactive < pdpol_state.s_inactarg;
424}
425
426void
427uvmpdpol_tune(void)
428{
429
430 clock_tune();
431}
432
433#if !defined(PDSIM)
434
435#include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
436
437void
438uvmpdpol_sysctlsetup(void)
439{
440 struct uvmpdpol_globalstate *s = &pdpol_state;
441
442 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
443 SYSCTL_DESCR("Percentage of physical memory reserved "
444 "for anonymous application data"));
445 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
446 SYSCTL_DESCR("Percentage of physical memory reserved "
447 "for cached file data"));
448 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
449 SYSCTL_DESCR("Percentage of physical memory reserved "
450 "for cached executable data"));
451
452 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
453 SYSCTL_DESCR("Percentage of physical memory which will "
454 "be reclaimed from other usage for "
455 "anonymous application data"));
456 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
457 SYSCTL_DESCR("Percentage of physical memory which will "
458 "be reclaimed from other usage for cached "
459 "file data"));
460 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
461 SYSCTL_DESCR("Percentage of physical memory which will "
462 "be reclaimed from other usage for cached "
463 "executable data"));
464
465 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
466 SYSCTL_DESCR("Percentage of inactive queue of "
467 "the entire (active + inactive) queue"));
468}
469
470#endif /* !defined(PDSIM) */
471
472#if defined(PDSIM)
473void
474pdsim_dump(const char *id)
475{
476#if defined(DEBUG)
477 /* XXX */
478#endif /* defined(DEBUG) */
479}
480#endif /* defined(PDSIM) */
481