2463 lines
58 KiB
C
2463 lines
58 KiB
C
/**************************************************************************
|
|
* *
|
|
* Copyright (C) 1994-1999, Silicon Graphics, Inc. *
|
|
* *
|
|
* These coded instructions, statements, and computer programs contain *
|
|
* unpublished proprietary information of Silicon Graphics, Inc., and *
|
|
* are protected by Federal copyright law. They may not be disclosed *
|
|
* to third parties or copied or duplicated in any form, in whole or *
|
|
* in part, without the prior written consent of Silicon Graphics, Inc. *
|
|
* *
|
|
**************************************************************************/
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/immu.h>
|
|
#include <sys/cmn_err.h>
|
|
#include <sys/atomic_ops.h>
|
|
#include <sys/pfdat.h>
|
|
#include <sys/pda.h>
|
|
#include <sys/kmem.h>
|
|
#include <sys/systm.h> /* splhi */
|
|
#include <ksys/vproc.h>
|
|
#include <sys/debug.h>
|
|
#include <sys/sysmp.h> /* numainfo */
|
|
#include <sys/idbgentry.h>
|
|
#include <sys/nodepda.h>
|
|
#include <sys/numa.h>
|
|
#include <ksys/rmap.h>
|
|
#include "os/as/pmap.h"
|
|
|
|
#ifdef DEBUG_PFNLOCKS
|
|
#include <sys/proc.h>
|
|
#endif
|
|
|
|
#include <sys/ktrace.h>
|
|
#include <ksys/vnode_pcache.h>
|
|
|
|
/*
|
|
* Reverse Mapping layer (rmap)
|
|
* rmap layer would be responsible for creating and maintaining the reverse
|
|
* mapping from the pfdat structures to individual process page
|
|
* table entries (ptes)
|
|
*/
|
|
|
|
/*
|
|
* Data structures for reverse mapping.
|
|
*
|
|
* Reverse mapping involves keeping track of the mapping from the
|
|
* physical page to the page table entries corresponding to the virtual
|
|
* address of all the processes mapping this physical page.
|
|
*
|
|
* Each physical page is represented by a pfdat data structure, and
|
|
* each pfdat has a pointer which could either be used as a pointer
|
|
* to the page table entry, or to point to the reverse map structure
|
|
* which in turn keeps track of the list of pointers pointing to
|
|
* page table entries
|
|
*
|
|
* The reverse map structure is growable table (array) of pte pointers. The
|
|
* smallest table is the size of a cache line (128 bytes) and the largest size
|
|
* matches the system page size. The first entry (8 bytes) of the table or in
|
|
* the case of 32 bit systems, the first two entries are used to keep
|
|
* housekeeping information. If the number of ptes per page grows beyond this,
|
|
* reverse map structure becomes a two level table with the first level table
|
|
* containing pointers to the second level table. The second level table
|
|
* can have 32 entries. The maximum number of rmaps we can have in the system
|
|
* is limited by the size of the pf_use field of the pfdat which is a short.
|
|
* To begin with the entries in the table are linked into a singly linked list
|
|
* of free entries. The head of this list is kept in the rmap_freelist_head
|
|
* field of the rmap_t structure. When pte is added to the list, the first
|
|
* entry from free list is selected and used. The pte carries an rmap_index
|
|
* field where the index of the rmap entry is stored. This makes the deleting
|
|
* the pte entry from the rmap a simple operation. It is for this reason, the
|
|
* rmap structure was chosen to be a linear array. If the array is a two level
|
|
* array the the lower bits of the rmap index select the into the first level
|
|
* table, and the higher bits select the second level table. This enables us
|
|
* to not recalculate the indices of existing ptes in an rmap when a second
|
|
* level table is created.
|
|
* Each second level table carries its own free list. This helps free the
|
|
* table when all entries in it are free.
|
|
* The rmap structure is protected by its own lock. It resides in the least
|
|
* significant bit of the pf_pdep1 field of the rmap. The next bit in the
|
|
* field indicates if the pf_pdep2 field contains a pointer to a pte or
|
|
* a reverse map structure.
|
|
* To least significant bit of the first level table entry indicates if
|
|
* that entry contains a pointer to a second level table or a pte.
|
|
* The various arrays are allocated using zones. There is a zone for
|
|
* each array size. The rmap_t carries a zone index which indicates the
|
|
* zone from which that structure is allocated.
|
|
* For systems where the pte is not big enough to hold the rmap index,
|
|
* the array is sequentially searched to find the pte during the delete
|
|
* operation. This should be the case for low end systems.
|
|
*/
|
|
#define SUCCESS 0
|
|
#define FAILURE 1
|
|
|
|
/*
|
|
* Flag indicates that the first level entry is pointing to a second
|
|
* level table. This is the lowest significant bit of the first level entry
|
|
* which is guaranteed to be zero for a pointer of type
|
|
* rmap_second_level_table_t.
|
|
*/
|
|
#define SECOND_LEVEL_TABLE_FLAG 1
|
|
|
|
/*
|
|
* Starting index for first level table. We skip 0. It has housekeeping
|
|
* entries.
|
|
*/
|
|
|
|
#if _PAGESZ == 16384
|
|
#define MAX_RMAP_FIRST_LEVEL_TABLE_SIZE 2047
|
|
#define FIRST_LEVEL_INDEX_OFFSET 1
|
|
|
|
#elif _PAGESZ == 4096
|
|
|
|
#define MAX_RMAP_FIRST_LEVEL_TABLE_SIZE 1022
|
|
#define FIRST_LEVEL_INDEX_OFFSET 2
|
|
#else
|
|
#error "Unknown page size"
|
|
#endif
|
|
|
|
#define SECOND_LEVEL_TABLE_SIZE 32
|
|
|
|
/*
|
|
* Max number of ptes that can be added to the rmap of a page.
|
|
*/
|
|
#define MAX_NUM_RMAP_ENTRIES (MAX_RMAP_FIRST_LEVEL_TABLE_SIZE * \
|
|
SECOND_LEVEL_TABLE_SIZE)
|
|
|
|
/*
|
|
* Max. first level table index.
|
|
*/
|
|
#define MAX_RMAP_FIRST_LEVEL_INDEX (MAX_RMAP_FIRST_LEVEL_TABLE_SIZE + \
|
|
FIRST_LEVEL_INDEX_OFFSET)
|
|
|
|
|
|
/*
|
|
* Indicates that the entry is free and does not contain a pte or a
|
|
* second level table pointer.
|
|
*/
|
|
|
|
#define IS_FREE_ENTRY(table_ent) (table_ent->freelist_index \
|
|
< MAX_NUM_RMAP_ENTRIES)
|
|
/*
|
|
* Returns true if a first level entry points to a second level table.
|
|
*/
|
|
#define IS_SECOND_LEVEL(table_ent) \
|
|
((__psint_t)((table_ent)->second_level_table) \
|
|
& SECOND_LEVEL_TABLE_FLAG)
|
|
|
|
#define RMAPNULL (rmap_t *)0
|
|
|
|
/*
|
|
* Sets a flag in the first level entry to indicate that it points to
|
|
* second level table.
|
|
*/
|
|
#define SET_RMAP_SECOND_LEVEL_FLAG(table_ent) \
|
|
(((table_ent)->second_level_table) = \
|
|
((rmap_second_level_table_t *)\
|
|
((__psint_t)((table_ent)->second_level_table) | \
|
|
SECOND_LEVEL_TABLE_FLAG)))
|
|
|
|
#define CLR_RMAP_SECOND_LEVEL_FLAG(second_level_table) \
|
|
((second_level_table) = \
|
|
((rmap_second_level_table_t *)\
|
|
((__psint_t)(second_level_table) & \
|
|
(~SECOND_LEVEL_TABLE_FLAG))))
|
|
|
|
/*
|
|
* Get a pointer to the beginning of the first level table.
|
|
*/
|
|
#define GET_FIRST_LEVEL_TABLE(rmap) ((table_ent_t *)\
|
|
((caddr_t)(rmap) + sizeof(rmap_t)))
|
|
|
|
|
|
/*
|
|
* Get the second level table pointer given the first level entry. This
|
|
* just gets the pointer and clears the SECOND_LEVEL_TABLE_FLAG.
|
|
*/
|
|
#define GET_SECOND_LEVEL_TABLE(first_level_entry) \
|
|
((rmap_second_level_table_t *)((__psint_t)\
|
|
((first_level_entry)->second_level_table) & (~SECOND_LEVEL_TABLE_FLAG)))
|
|
|
|
/*
|
|
* Gets the second level table entry given first level entry and the second
|
|
* level table index.
|
|
*/
|
|
#define GET_SECOND_LEVEL_ENTRY(first_level_entry, indx) \
|
|
((GET_SECOND_LEVEL_TABLE(first_level_entry))->table_entries + (indx))
|
|
|
|
|
|
/*
|
|
* Get a pointer to the entry in the first level table for a given
|
|
* index.
|
|
*/
|
|
|
|
#define GET_FIRST_LEVEL_ENTRY(rmap, indx) (table_ent_t *)\
|
|
((table_ent_t *)(rmap) + (indx))
|
|
|
|
/*
|
|
* Given the rmap_index get the first level and second level indices.
|
|
*/
|
|
|
|
#define GET_FIRST_LEVEL_INDEX(rmap_index) \
|
|
((rmap_index) % MAX_RMAP_FIRST_LEVEL_INDEX)
|
|
|
|
#define GET_SECOND_LEVEL_INDEX(rmap_index) \
|
|
((rmap_index) / MAX_RMAP_FIRST_LEVEL_INDEX)
|
|
|
|
/*
|
|
* Compute the rmap index given the first and second level indices.
|
|
*/
|
|
#define RMAP_INDEX(first_level_index, second_level_index) \
|
|
((second_level_index) * \
|
|
MAX_RMAP_FIRST_LEVEL_INDEX + \
|
|
(first_level_index))
|
|
|
|
/*
|
|
* Gets the first level table size of a given rmap.
|
|
*/
|
|
#define FIRST_LEVEL_TABLE_SIZE(rmap) (rmap_table_sizes[(rmap)\
|
|
->rmap_zone_index])
|
|
|
|
|
|
typedef unsigned short table_index_t;
|
|
|
|
/*
|
|
* The first level table entry can either point to a pte or a second
|
|
* level table entry.
|
|
*/
|
|
|
|
typedef union {
|
|
pde_t *pdep;
|
|
struct rmap_second_level_table *second_level_table;
|
|
|
|
/*
|
|
* This is a ulong instead of table_index_t to match the size of the
|
|
* other entries.
|
|
* Assignmet to freelist_index should clear other bits in the field.
|
|
*/
|
|
|
|
ulong freelist_index;
|
|
} table_ent_t;
|
|
|
|
/*
|
|
* Second level table. It has a count and a free list head in addition
|
|
* to the table.
|
|
*/
|
|
typedef struct rmap_second_level_table {
|
|
cnt_t num_free_entries;
|
|
table_index_t freelist_head;
|
|
table_ent_t table_entries[SECOND_LEVEL_TABLE_SIZE];
|
|
} rmap_second_level_table_t;
|
|
|
|
typedef struct rmap {
|
|
table_index_t rmap_freelist_head; /* Head of free list */
|
|
table_index_t rmap_zone_index;
|
|
table_index_t rmap_freetable_hint;
|
|
pf_use_t num_ptes;
|
|
} rmap_t;
|
|
|
|
|
|
#if _PAGESZ == 16384
|
|
|
|
static int rmap_table_sizes[] = {
|
|
15, /* This + rmap_t size fits into a cache line (128 bytes) */
|
|
31, /* 256 bytes */
|
|
47, /* 384 bytes */
|
|
63, /* 512 bytes */
|
|
95, /* 768 bytes */
|
|
127, /* 1024 bytes */
|
|
191, /* 1536 bytes */
|
|
255, /* 2048 bytes */
|
|
511, /* 4096 bytes */
|
|
1023, /* 8192 bytes */
|
|
MAX_RMAP_FIRST_LEVEL_TABLE_SIZE, /* Fits exactly into a page */
|
|
};
|
|
|
|
#elif _PAGESZ == 4096
|
|
|
|
static int rmap_table_sizes[] = {
|
|
30, /* This + rmap_t size fits into a cache line */
|
|
62, /* 256 bytes */
|
|
94, /* 384 bytes */
|
|
126, /* 512 bytes */
|
|
254,
|
|
MAX_RMAP_FIRST_LEVEL_TABLE_SIZE, /* Fits exactly into a page */
|
|
};
|
|
|
|
#else
|
|
#error "Unknown page size"
|
|
#endif
|
|
|
|
#define NUM_RMAP_TABLE_SIZES (sizeof(rmap_table_sizes)/sizeof(int))
|
|
|
|
/*
|
|
* Max. zone index value.
|
|
*/
|
|
#define MAX_RMAP_ZONE_INDEX (NUM_RMAP_TABLE_SIZES - 1)
|
|
|
|
/* For scalability purpose, we would need one zone per node.
|
|
* In that case, rmap_init would be called once for each node?
|
|
* Perhaps there would be a per node data structure where this
|
|
* pointer could be attached.
|
|
*/
|
|
|
|
|
|
/*
|
|
* Zone for first level table.
|
|
*/
|
|
zone_t *rmap_zone[NUM_RMAP_TABLE_SIZES];
|
|
|
|
/*
|
|
* Zone for second level table.
|
|
*/
|
|
zone_t *second_level_table_zone;
|
|
|
|
/* Any static references go here */
|
|
static void rmap_init_freelist(table_ent_t *, table_index_t, table_index_t);
|
|
static int rmap_grow(pfd_t *, pde_t *);
|
|
static int rmap_second_level_table_add(pfd_t *, pde_t *);
|
|
static int rmap_grow_second_level_table(rmap_t *, pde_t *);
|
|
static int rmap_doop(uint, pde_t *, void *, void *);
|
|
static int rmap_lockpfns(pfd_t *);
|
|
static int rmap_mark_shotdown(pfd_t *);
|
|
static int rmap_verify_locks(pfd_t *, int);
|
|
#ifdef MH_R10000_SPECULATION_WAR
|
|
static int rmap_invalidate_uptbl_entry(pde_t *pde, void *counts);
|
|
#endif
|
|
|
|
|
|
/*
|
|
* Debugging and statistics.
|
|
*/
|
|
|
|
#ifdef RMAP_STATS
|
|
struct rmapinfo_s rmapinfo;
|
|
#endif
|
|
|
|
/* IP22 has a space crunch - debug kernel needs to fit in 7.25M. */
|
|
/* CELL_IRIX also has a space crunch too since it uses MAPPED_KERNEL */
|
|
#if defined(DEBUG) && !defined(IP22) && !defined(CELL_IRIX) && !defined(SN0XXL)
|
|
#define RMAP_DEBUG
|
|
#endif /* DEBUG */
|
|
|
|
#ifdef RMAP_DEBUG
|
|
/* Setup a circular list, to track the operation done on
|
|
* the pde/pfdat passed
|
|
*/
|
|
extern void idbg_rmapprint(pfn_t);
|
|
|
|
#define RMAP_DBGENTS 1024*16 /* Power of 2 */
|
|
#define RMAP_DBGMASK (RMAP_DBGENTS - 1)
|
|
|
|
struct rmap_dbgchain {
|
|
char rmap_op;
|
|
char rmap_fl;
|
|
short rmap_pid;
|
|
int rmap_tm;
|
|
pde_t *rmap_pdep;
|
|
pfd_t *rmap_pfdp;
|
|
void *rmap_ra;
|
|
} rmap_dbglist[RMAP_DBGENTS];
|
|
|
|
int rmap_dbgindx = 0;
|
|
extern time_t lbolt;
|
|
|
|
#define RMAP_ADDMAPLOG 1
|
|
#define RMAP_DELMAPLOG 2
|
|
#define RMAP_SWAPMAP 3
|
|
#define RMAP_SCANMAP 4
|
|
#define RMAP_ADDMAP_FIRST 5
|
|
#define RMAP_DELMAP_LAST 6
|
|
|
|
#define rflag(pf) (IS_LONG_RMAP(pf) ? 1 : 0)
|
|
#define RMAP_LOGENT(o,pf,pd,ra) rmap_dbglist[rmap_dbgindx].rmap_op = o; \
|
|
rmap_dbglist[rmap_dbgindx].rmap_fl= rflag(pf);\
|
|
rmap_dbglist[rmap_dbgindx].rmap_tm = lbolt; \
|
|
rmap_dbglist[rmap_dbgindx].rmap_pdep = pd; \
|
|
rmap_dbglist[rmap_dbgindx].rmap_pfdp = pf; \
|
|
rmap_dbglist[rmap_dbgindx].rmap_ra =(void *)ra;\
|
|
rmap_dbglist[rmap_dbgindx].rmap_pid = \
|
|
current_pid(); \
|
|
rmap_dbgindx = ++rmap_dbgindx & RMAP_DBGMASK;
|
|
|
|
#define RMAP_MAXCALLERS 16
|
|
#define RMAP_ADDMAPCLR 0
|
|
#define RMAP_DELMAPCLR 1
|
|
unsigned long rmap_callers[2][RMAP_MAXCALLERS];
|
|
|
|
static void rmap_logcallers(int, unsigned long);
|
|
|
|
#define RMAP_LOGCALLER(x, y) rmap_logcallers(x, (unsigned long)y)
|
|
|
|
#else /* !RMAP_DEBUG */
|
|
|
|
#define RMAP_LOGENT(x,y,z,p)
|
|
|
|
#define RMAP_ADDMAPCLR 0
|
|
#define RMAP_DELMAPCLR 1
|
|
#define RMAP_LOGCALLER(x, y)
|
|
|
|
#endif /* RMAP_DEBUG */
|
|
|
|
#ifdef DEBUG_PFNLOCKS
|
|
extern void rmap_verify_lock_consistency(pfd_t*);
|
|
#define RMAP_VERIFY_LOCK_CONSISTENCY(pfdp) rmap_verify_lock_consistency((pfdp))
|
|
#else /* ! DEBUG_PFNLOCKS */
|
|
#define RMAP_VERIFY_LOCK_CONSISTENCY(pfdp)
|
|
#endif /* ! DEBUG_PFNLOCKS */
|
|
|
|
|
|
|
|
/*
|
|
* rmap_init : Initialize rmap specific data structures
|
|
* Called from pmap_init
|
|
* Initialize all the zone for the different sizes.
|
|
*/
|
|
void
|
|
rmap_init(void)
|
|
{
|
|
|
|
int i;
|
|
|
|
for ( i = 0; i < NUM_RMAP_TABLE_SIZES; i++) {
|
|
rmap_zone[i] = kmem_zone_init(sizeof(rmap_t)
|
|
+ rmap_table_sizes[i] * sizeof(table_ent_t), "Rmap");
|
|
ASSERT(rmap_zone[i]);
|
|
}
|
|
second_level_table_zone = kmem_zone_init(sizeof(rmap_second_level_table_t),
|
|
"Rmap second level table");
|
|
|
|
#ifdef RMAP_DEBUG
|
|
idbg_addfunc("rmaplog", (void (*)())idbg_rmaplog);
|
|
idbg_addfunc("rmapp", (void (*)())idbg_rmapprint);
|
|
#endif
|
|
#ifdef RMAP_STATS
|
|
idbg_addfunc("rmapstat", (void (*)())idbg_rmap_stats);
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* Grow the first level table or add a second level table to the rmap.
|
|
* Called when no more free entries are left in the current rmap.
|
|
*/
|
|
static int
|
|
rmap_grow(pfd_t *pfdp, pde_t *pdep)
|
|
{
|
|
table_index_t new_zone_index, new_table_size,
|
|
cur_table_size;
|
|
table_ent_t *new_table, *cur_table;
|
|
rmap_t *new_rmap;
|
|
rmap_t *rmap = pfdp->pf_rmapp;
|
|
pde_t *old_pdep;
|
|
table_index_t start_index;
|
|
|
|
|
|
if (IS_LONG_RMAP(pfdp)) {
|
|
/*
|
|
* Nothing in the free list.
|
|
*/
|
|
|
|
ASSERT(rmap->rmap_freelist_head == 0);
|
|
|
|
if (rmap->rmap_zone_index >= MAX_RMAP_ZONE_INDEX) {
|
|
return rmap_grow_second_level_table(rmap, pdep);
|
|
} else {
|
|
new_zone_index = rmap->rmap_zone_index + 1;
|
|
|
|
new_table_size = rmap_table_sizes[new_zone_index];
|
|
}
|
|
|
|
cur_table = GET_FIRST_LEVEL_TABLE(rmap);
|
|
|
|
cur_table_size = rmap_table_sizes[
|
|
rmap->rmap_zone_index];
|
|
|
|
new_rmap = kmem_zone_zalloc(rmap_zone[new_zone_index],
|
|
KM_NOSLEEP);
|
|
if (new_rmap == NULL) return 0;
|
|
|
|
new_table = GET_FIRST_LEVEL_TABLE(new_rmap);
|
|
|
|
new_rmap->rmap_zone_index = new_zone_index;
|
|
|
|
/*
|
|
* Copy the old contents to the new list.
|
|
*/
|
|
|
|
bcopy(cur_table, new_table, cur_table_size*sizeof(table_ent_t));
|
|
|
|
/*
|
|
* Get to the start of the free entry.
|
|
*/
|
|
|
|
new_table += cur_table_size;
|
|
|
|
new_table->pdep = pdep;
|
|
|
|
start_index = cur_table_size + FIRST_LEVEL_INDEX_OFFSET;
|
|
|
|
pg_set_rmap_index(pdep, start_index);
|
|
|
|
new_table++;
|
|
new_rmap->num_ptes = rmap->num_ptes + 1;
|
|
start_index++;
|
|
|
|
/*
|
|
* Initialize the freelist.
|
|
*/
|
|
|
|
|
|
new_rmap->rmap_freelist_head = start_index;
|
|
|
|
|
|
rmap_init_freelist(new_table, start_index,
|
|
new_table_size - cur_table_size - 1);
|
|
/*
|
|
* Free the old table.
|
|
*/
|
|
|
|
pfdp->pf_rmapp = new_rmap;
|
|
|
|
kmem_zone_free(rmap_zone[new_zone_index - 1], rmap);
|
|
|
|
} else {
|
|
|
|
new_table_size = rmap_table_sizes[0];
|
|
/*
|
|
* By allocating one chunk we get the whole data structure
|
|
* in one cache line.
|
|
*/
|
|
|
|
new_rmap = kmem_zone_zalloc(rmap_zone[0], KM_NOSLEEP);
|
|
|
|
if (new_rmap == NULL) return 0;
|
|
|
|
new_rmap->rmap_zone_index = 0;
|
|
|
|
new_table = GET_FIRST_LEVEL_TABLE(new_rmap);
|
|
|
|
start_index = FIRST_LEVEL_INDEX_OFFSET;
|
|
|
|
old_pdep = pfdp->pf_pdep2;
|
|
|
|
new_table->pdep = old_pdep;
|
|
pg_set_rmap_index(old_pdep, start_index);
|
|
new_table++;
|
|
start_index++;
|
|
|
|
new_table->pdep = pdep;
|
|
|
|
pg_set_rmap_index(pdep, start_index);
|
|
new_table++;
|
|
start_index++;
|
|
|
|
/*
|
|
* Set the rmap in the pfdat structure.
|
|
*/
|
|
|
|
pfdp->pf_rmapp = new_rmap;
|
|
|
|
SET_LONG_RMAP(pfdp);
|
|
|
|
/*
|
|
* Initialize the freelist.
|
|
*/
|
|
|
|
new_rmap->num_ptes = 2;
|
|
|
|
new_rmap->rmap_freelist_head = start_index;
|
|
|
|
rmap_init_freelist(new_table, start_index, new_table_size
|
|
- start_index + FIRST_LEVEL_INDEX_OFFSET);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
/*
|
|
* Initialize the rmap freelist.
|
|
*/
|
|
static void
|
|
rmap_init_freelist(table_ent_t *table, table_index_t start_index,
|
|
table_index_t table_length)
|
|
{
|
|
table_ent_t *table_end;
|
|
|
|
table_end = table + table_length - 1;
|
|
start_index++;
|
|
for (;table < table_end; table++)
|
|
table->freelist_index = start_index++;
|
|
|
|
table->freelist_index = 0;
|
|
}
|
|
|
|
static int
|
|
rmap_grow_second_level_table(rmap_t *rmap, pde_t *pdep)
|
|
{
|
|
table_ent_t *first_level_entry;
|
|
pde_t *tmp_pdep;
|
|
rmap_second_level_table_t *second_level_table;
|
|
table_index_t first_level_index,
|
|
second_level_index;
|
|
table_ent_t *second_level_entry;
|
|
|
|
ASSERT(rmap->rmap_zone_index == MAX_RMAP_ZONE_INDEX);
|
|
|
|
|
|
if (rmap->num_ptes == MAX_NUM_RMAP_ENTRIES) {
|
|
cmn_err(CE_PANIC,"Trying to grow rmaps beyond size %d\n",
|
|
MAX_NUM_RMAP_ENTRIES);
|
|
/* NOTREACHED */
|
|
}
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_TABLE(rmap);
|
|
|
|
/*
|
|
* Nothing in the free list.
|
|
*/
|
|
|
|
ASSERT(rmap->rmap_freelist_head == 0);
|
|
|
|
for (first_level_index = FIRST_LEVEL_INDEX_OFFSET;
|
|
first_level_index < MAX_RMAP_FIRST_LEVEL_INDEX;
|
|
first_level_index++, first_level_entry++) {
|
|
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
|
|
second_level_index = 0;
|
|
|
|
tmp_pdep = first_level_entry->pdep;
|
|
|
|
second_level_table = kmem_zone_zalloc(
|
|
second_level_table_zone, KM_NOSLEEP);
|
|
|
|
if (second_level_table == NULL) return 0;
|
|
|
|
second_level_entry =
|
|
second_level_table->table_entries;
|
|
|
|
first_level_entry->second_level_table =
|
|
second_level_table;
|
|
|
|
/*
|
|
* Add the two ptes.
|
|
* One was in the first_level_entry and the
|
|
* other is the one for which the addmap was
|
|
* invoked.
|
|
*/
|
|
|
|
second_level_entry->pdep = tmp_pdep;
|
|
second_level_entry++;
|
|
|
|
pg_set_rmap_index(tmp_pdep,
|
|
RMAP_INDEX(first_level_index,
|
|
second_level_index));
|
|
|
|
second_level_index++;
|
|
second_level_entry->pdep = pdep;
|
|
second_level_entry++;
|
|
pg_set_rmap_index(pdep,
|
|
RMAP_INDEX(first_level_index,
|
|
second_level_index));
|
|
|
|
second_level_index++;
|
|
|
|
second_level_table->freelist_head = second_level_index;
|
|
|
|
SET_RMAP_SECOND_LEVEL_FLAG(first_level_entry);
|
|
|
|
/*
|
|
* Update count field.
|
|
*/
|
|
second_level_table->num_free_entries =
|
|
SECOND_LEVEL_TABLE_SIZE -
|
|
second_level_index;
|
|
|
|
rmap->rmap_freetable_hint = first_level_index;
|
|
|
|
rmap_init_freelist(second_level_entry,
|
|
second_level_index,
|
|
SECOND_LEVEL_TABLE_SIZE
|
|
- second_level_index);
|
|
break;
|
|
}
|
|
}
|
|
|
|
rmap->num_ptes++;
|
|
|
|
if ( first_level_index == MAX_RMAP_FIRST_LEVEL_INDEX)
|
|
cmn_err(CE_PANIC, "Ran out of rmap entries\n");
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* rmap_addmap:
|
|
* Add a reverse mapping to the given pfdat.
|
|
* pfdp points to the pfdat mapping the physical page, and
|
|
* pdep points to the page table entry which would be
|
|
* mapping to physical page represented by pfdp
|
|
*
|
|
* If either of the two pde pointers in pfdat is free, use them to
|
|
* point to pdep.
|
|
*
|
|
* If there is an rmap structure, then check if there is a place for
|
|
* attaching the new pointer, and do so if possible. Otherwise,
|
|
* allocate a new rmap_t structure, and setup the pointers appropriately.
|
|
*
|
|
* Return 0 to indicate succesful insertion, 1 to indicate the
|
|
* caller should go sxbrk due to memory shortage, and retry operation.
|
|
*/
|
|
/* ARGSUSED */
|
|
int
|
|
rmap_addmap_nolock(pfd_t *pfdp, pde_t *pdep, struct pm* pm)
|
|
{
|
|
rmap_t *pf_rmap = 0;
|
|
table_index_t free_entry_index;
|
|
table_ent_t *rmap_free_entry;
|
|
|
|
ASSERT(pfdp);
|
|
ASSERT(pdep);
|
|
/*
|
|
* It would be nice to assert that the pte has a pfn that
|
|
* corresponds to the pfdat, but that is not true if the
|
|
* call comes out of VPAG_UPDATE_RMAP_ADDMAP.
|
|
*/
|
|
|
|
ASSERT(pfdp->pf_use); /* There should be >= 1 user of this page!! */
|
|
|
|
RMAP_DOSTAT(rmapadd);
|
|
|
|
|
|
RMAP_LOGENT(RMAP_ADDMAPLOG, pfdp, pdep, __return_address);
|
|
RMAP_LOGCALLER(RMAP_ADDMAPCLR, __return_address);
|
|
|
|
|
|
/* If there is place in the pfdat for this pde use it */
|
|
if (GET_RMAP_PDEP1(pfdp) == PDENULL){
|
|
ASSERT(pdep != pfdp->pf_pdep2);
|
|
SET_RMAP_PDEP1(pfdp, pdep);
|
|
|
|
if (pfdp->pf_pdep2 == 0) {
|
|
/*
|
|
* This is the first pde in the map,
|
|
* start the migration for the page.
|
|
*/
|
|
|
|
migr_start(pfdattopfn(pfdp), pm);
|
|
RMAP_LOGENT(RMAP_ADDMAP_FIRST, pfdp, pdep, __return_address);
|
|
}
|
|
return 0;
|
|
}
|
|
else if (pfdp->pf_pdep2 == PDENULL) {
|
|
ASSERT(GET_RMAP_PDEP1(pfdp) != NULL);
|
|
ASSERT(pdep != GET_RMAP_PDEP1(pfdp));
|
|
pfdp->pf_pdep2 = pdep;
|
|
|
|
return 0;
|
|
}
|
|
else if (IS_LONG_RMAP(pfdp)) {
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
free_entry_index = pf_rmap->rmap_freelist_head;
|
|
if (free_entry_index) {
|
|
RMAP_DOSTAT(rmapladd);
|
|
rmap_free_entry =
|
|
GET_FIRST_LEVEL_ENTRY(pf_rmap,
|
|
free_entry_index);
|
|
pf_rmap->rmap_freelist_head = (table_index_t)
|
|
rmap_free_entry->freelist_index;
|
|
rmap_free_entry->pdep = pdep;
|
|
pg_set_rmap_index(pdep, free_entry_index);
|
|
pf_rmap->num_ptes++;
|
|
|
|
return 0;
|
|
} else if (pf_rmap->rmap_zone_index
|
|
== MAX_RMAP_ZONE_INDEX) {
|
|
|
|
if (rmap_second_level_table_add(pfdp, pdep)) {
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Looks like the current rmap structure does not have
|
|
* sufficient memory to drop in a new pde entry. So, we
|
|
* need to grow the rmap structure, and use that.
|
|
*/
|
|
|
|
if (rmap_grow(pfdp, pdep)) {
|
|
RMAP_DOSTAT(rmapladd);
|
|
RMAP_DOSTAT(rmapinuse);
|
|
return 0;
|
|
} else {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
void
|
|
rmap_addmap(pfd_t *pfdp, pde_t *pdep, struct pm* pm)
|
|
{
|
|
int s = RMAP_LOCK(pfdp);
|
|
|
|
while (rmap_addmap_nolock(pfdp, pdep, pm)) {
|
|
RMAP_UNLOCK(pfdp, s);
|
|
setsxbrk();
|
|
s = RMAP_LOCK(pfdp);
|
|
}
|
|
RMAP_UNLOCK(pfdp, s);
|
|
}
|
|
|
|
|
|
/*
|
|
* Try and find a place for the pdep in the second level table.
|
|
*/
|
|
|
|
static int
|
|
rmap_second_level_table_add(pfd_t *pfdp, pde_t *pdep)
|
|
{
|
|
rmap_t *pf_rmap;
|
|
|
|
table_ent_t *rmap_free_entry,
|
|
*first_level_entry;
|
|
/* REFERENCED */
|
|
table_index_t free_entry_index,
|
|
first_level_index,
|
|
second_level_index;
|
|
table_index_t i;
|
|
|
|
rmap_second_level_table_t *second_level_table;
|
|
|
|
ASSERT(IS_LONG_RMAP(pfdp));
|
|
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
|
|
/* Search for a second level table entry */
|
|
|
|
first_level_index = pf_rmap->rmap_freetable_hint;
|
|
|
|
/*
|
|
* Hint is NULL. Try to grow second level table.
|
|
*/
|
|
if (first_level_index == 0) return 0;
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_ENTRY(pf_rmap, first_level_index);
|
|
|
|
for ( i = FIRST_LEVEL_INDEX_OFFSET;
|
|
i < MAX_RMAP_FIRST_LEVEL_INDEX; i++) {
|
|
|
|
second_level_table = GET_SECOND_LEVEL_TABLE(first_level_entry);
|
|
|
|
if (IS_SECOND_LEVEL(first_level_entry)
|
|
&& (second_level_table->num_free_entries)) {
|
|
|
|
|
|
second_level_index =
|
|
second_level_table->freelist_head;
|
|
|
|
rmap_free_entry =
|
|
second_level_table->table_entries +
|
|
second_level_index;
|
|
|
|
second_level_table->freelist_head = (table_index_t)
|
|
rmap_free_entry->freelist_index;
|
|
second_level_table->num_free_entries--;
|
|
|
|
pf_rmap->rmap_freetable_hint = first_level_index;
|
|
|
|
free_entry_index =
|
|
RMAP_INDEX(first_level_index,
|
|
second_level_index);
|
|
|
|
rmap_free_entry->pdep = pdep;
|
|
pg_set_rmap_index(pdep, free_entry_index);
|
|
pf_rmap->num_ptes++;
|
|
|
|
return 1;
|
|
}
|
|
|
|
first_level_index++;
|
|
first_level_entry++;
|
|
|
|
if (first_level_index == MAX_RMAP_FIRST_LEVEL_INDEX) {
|
|
first_level_index = FIRST_LEVEL_INDEX_OFFSET;
|
|
first_level_entry = GET_FIRST_LEVEL_ENTRY(pf_rmap,
|
|
first_level_index);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
#ifdef RMAP_PTE_INDEX
|
|
|
|
/*
|
|
* Delete pde entry from the reverse map
|
|
*
|
|
* If the pde to be deleted is the last pde in the rmap, we will
|
|
* disable the migration interrupt for the page since it is no longer
|
|
* associated with any other address space. And, logically, we will
|
|
* shut down the migration activities associated with the page.
|
|
*/
|
|
|
|
void
|
|
rmap_delmap_nolock(pfd_t *pfdp, pde_t *pdep)
|
|
{
|
|
rmap_t *pf_rmap;
|
|
table_index_t rmap_index;
|
|
table_index_t first_level_index;
|
|
table_index_t second_level_index;
|
|
rmap_second_level_table_t *second_level_table;
|
|
table_ent_t *second_level_entry;
|
|
table_ent_t *first_level_entry;
|
|
|
|
ASSERT((pfdp && pdep && (pfdattopfn(pfdp) == pg_getpfn(pdep))) ||
|
|
pg_isshotdn(pdep));
|
|
|
|
RMAP_DOSTAT(rmapdel);
|
|
|
|
if (pg_isshotdn(pdep)) {
|
|
/* We are racing with a process that is shooting down
|
|
* replicated pages. pdep seems to be one of those ptes
|
|
* that was mapping a page which got tossed out.
|
|
* Since process shooting down the pages has taken care
|
|
* of disassociating this process from the page, we just
|
|
* need to return without doing anything.
|
|
*
|
|
* Since either the pfn is locked or the pfdat is held,
|
|
* we dont really need to hold the memory_lock to check
|
|
* if pdep has been shot down.
|
|
*/
|
|
return;
|
|
}
|
|
|
|
RMAP_LOGENT(RMAP_DELMAPLOG, pfdp, pdep, __return_address);
|
|
RMAP_LOGCALLER(RMAP_DELMAPCLR, __return_address);
|
|
|
|
if (GET_RMAP_PDEP1(pfdp) == pdep) {
|
|
SET_RMAP_PDEP1(pfdp, 0);
|
|
|
|
if (pfdp->pf_pdep2 == 0) {
|
|
|
|
ASSERT(pfdp->pf_rmapp == 0);
|
|
|
|
/*
|
|
* This pde is the last one in the map,
|
|
* stop the migration for this page frame
|
|
*/
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST, pfdp, pdep,
|
|
__return_address);
|
|
} else {
|
|
ASSERT(RMAP_ISLOCKED(pfdp));
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
if (IS_LONG_RMAP(pfdp) == 0) {
|
|
|
|
if (pfdp->pf_pdep2 == pdep) {
|
|
pfdp->pf_pdep2 = 0;
|
|
|
|
if (GET_RMAP_PDEP1(pfdp) == 0) {
|
|
/*
|
|
* This pde is the last one in the map,
|
|
* stop the migration for this page frame
|
|
*/
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST, pfdp, pdep,
|
|
__return_address);
|
|
}
|
|
}
|
|
else {
|
|
cmn_err(CE_PANIC,
|
|
"rmap_del: pde 0x%x not in pfdat 0x%x",
|
|
pdep, pfdp);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
RMAP_DOSTAT(rmapldel);
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
|
|
rmap_index = (table_index_t)pg_get_rmap_index(pdep);
|
|
|
|
first_level_index = GET_FIRST_LEVEL_INDEX(rmap_index);
|
|
|
|
ASSERT(first_level_index);
|
|
ASSERT(first_level_index < (FIRST_LEVEL_TABLE_SIZE(pf_rmap)
|
|
+ FIRST_LEVEL_INDEX_OFFSET));
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_ENTRY(pf_rmap, first_level_index);
|
|
|
|
if (IS_SECOND_LEVEL(first_level_entry)) {
|
|
|
|
second_level_index = GET_SECOND_LEVEL_INDEX(rmap_index);
|
|
|
|
ASSERT (second_level_index < SECOND_LEVEL_TABLE_SIZE);
|
|
|
|
second_level_table = GET_SECOND_LEVEL_TABLE(first_level_entry);
|
|
|
|
second_level_entry = second_level_table->table_entries
|
|
+ second_level_index;
|
|
|
|
ASSERT(pdep == second_level_entry->pdep);
|
|
|
|
second_level_table->num_free_entries++;
|
|
|
|
second_level_entry->freelist_index =
|
|
second_level_table->freelist_head;
|
|
second_level_table->freelist_head = second_level_index;
|
|
|
|
/*
|
|
* Lets see if its time to free the table.
|
|
*/
|
|
if (second_level_table->num_free_entries ==
|
|
SECOND_LEVEL_TABLE_SIZE) {
|
|
kmem_zone_free(second_level_table_zone,
|
|
second_level_table);
|
|
|
|
first_level_entry->freelist_index =
|
|
pf_rmap->rmap_freelist_head;
|
|
pf_rmap->rmap_freelist_head = first_level_index;
|
|
}
|
|
|
|
pf_rmap->num_ptes--;
|
|
|
|
if (pf_rmap->num_ptes == 0) {
|
|
CLR_LONG_RMAP(pfdp);
|
|
pfdp->pf_rmapp = RMAPNULL;
|
|
if (GET_RMAP_PDEP1(pfdp) == 0) {
|
|
/*
|
|
* This deleted pde is the last one
|
|
* in the map, stop the migration
|
|
* activiti for this page frame
|
|
*/
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST, pfdp, pdep,
|
|
__return_address);
|
|
}
|
|
/*
|
|
* Free the first level table.
|
|
*/
|
|
kmem_zone_free(
|
|
rmap_zone[pf_rmap->rmap_zone_index], pf_rmap);
|
|
|
|
return;
|
|
}
|
|
|
|
return;
|
|
} else {
|
|
ASSERT(pdep == first_level_entry->pdep);
|
|
ASSERT(rmap_index == first_level_index);
|
|
|
|
first_level_entry->freelist_index = pf_rmap->rmap_freelist_head;
|
|
pf_rmap->rmap_freelist_head = first_level_index;
|
|
pf_rmap->num_ptes--;
|
|
|
|
if (pf_rmap->num_ptes == 0) {
|
|
|
|
/*
|
|
* Free the first level table.
|
|
*/
|
|
CLR_LONG_RMAP(pfdp);
|
|
pfdp->pf_rmapp = RMAPNULL;
|
|
if (GET_RMAP_PDEP1(pfdp) == 0) {
|
|
/*
|
|
* This deleted pde is the last one
|
|
* in the map, stop the migration
|
|
* activiti for this page frame
|
|
*/
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST, pfdp, pdep,
|
|
__return_address);
|
|
}
|
|
/*
|
|
* Free the first level table.
|
|
*/
|
|
kmem_zone_free(
|
|
rmap_zone[pf_rmap->rmap_zone_index], pf_rmap);
|
|
|
|
return;
|
|
}
|
|
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* rmap_swapmap:
|
|
* Search for the place where opde exists, and replace it with npde.
|
|
* At this time this's called only from seg_commonsplit, which is
|
|
* trying to separate the shared and private areas for sproc processes,
|
|
* and from replace_pmap, which is trying to replace the pmap for
|
|
* a process which is switching abis
|
|
*/
|
|
void
|
|
rmap_swapmap(pfd_t *pfdp, pde_t *opde, pde_t *npde )
|
|
{
|
|
|
|
rmap_t *pf_rmap;
|
|
int s;
|
|
table_index_t rmap_index, first_level_index, second_level_index;
|
|
table_ent_t *first_level_entry, *second_level_entry;
|
|
|
|
RMAP_DOSTAT(rmapswap);
|
|
|
|
RMAP_LOGENT(RMAP_SWAPMAP, pfdp, opde, __return_address);
|
|
|
|
s = RMAP_LOCK(pfdp);
|
|
|
|
if (GET_RMAP_PDEP1(pfdp) == opde) {
|
|
SET_RMAP_PDEP1(pfdp, npde);
|
|
RMAP_UNLOCK(pfdp, s);
|
|
return;
|
|
}
|
|
|
|
if (!(IS_LONG_RMAP(pfdp))) {
|
|
ASSERT(pfdp->pf_pdep2 == opde); /* it better be true */
|
|
pfdp->pf_pdep2 = npde;
|
|
RMAP_UNLOCK(pfdp, s);
|
|
return;
|
|
}
|
|
|
|
/* This pfdat has one or more users. */
|
|
ASSERT(pfdp->pf_use >= 1);
|
|
|
|
rmap_index = (table_index_t)pg_get_rmap_index(opde);
|
|
|
|
first_level_index = GET_FIRST_LEVEL_INDEX(rmap_index);
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
|
|
ASSERT(first_level_index < (FIRST_LEVEL_TABLE_SIZE(pf_rmap)
|
|
+ FIRST_LEVEL_INDEX_OFFSET));
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_ENTRY(pf_rmap, first_level_index);
|
|
|
|
if (IS_SECOND_LEVEL(first_level_entry)) {
|
|
|
|
second_level_index = GET_SECOND_LEVEL_INDEX(rmap_index);
|
|
|
|
ASSERT (second_level_index < SECOND_LEVEL_TABLE_SIZE);
|
|
|
|
second_level_entry = GET_SECOND_LEVEL_ENTRY(first_level_entry,
|
|
second_level_index);
|
|
|
|
ASSERT(opde == second_level_entry->pdep);
|
|
|
|
second_level_entry->pdep = npde;
|
|
pg_set_rmap_index(npde, rmap_index);
|
|
pg_set_rmap_index(opde, 0);
|
|
|
|
RMAP_UNLOCK(pfdp, s);
|
|
return;
|
|
} else {
|
|
ASSERT(opde == first_level_entry->pdep);
|
|
ASSERT(rmap_index == first_level_index);
|
|
|
|
/*
|
|
* Pdep is cleared before entering the freelist index.
|
|
* as freelist index is shorter (2 bytes) than pdep (8 bytes)
|
|
* and they share the same memory location.
|
|
*/
|
|
|
|
first_level_entry->pdep = npde;
|
|
pg_set_rmap_index(npde, rmap_index);
|
|
pg_set_rmap_index(opde, 0);
|
|
|
|
RMAP_UNLOCK(pfdp, s);
|
|
return;
|
|
}
|
|
|
|
/*NOTREACHED*/
|
|
}
|
|
|
|
#else /* RMAP_PTE_INDEX */
|
|
/*
|
|
* Versioon of rmap_delmap with no support for pte indices.
|
|
* Need by 32 bit systems.
|
|
*/
|
|
|
|
void
|
|
rmap_delmap_nolock(pfd_t *pfdp, pde_t *pdep)
|
|
{
|
|
rmap_t *pf_rmap;
|
|
int i, j;
|
|
table_index_t first_level_index;
|
|
rmap_second_level_table_t *second_level_table;
|
|
table_ent_t *second_level_entry;
|
|
table_ent_t *first_level_entry;
|
|
|
|
ASSERT((pfdp && pdep && (pfdattopfn(pfdp) == pg_getpfn(pdep))) ||
|
|
pg_isshotdn(pdep));
|
|
|
|
RMAP_DOSTAT(rmapdel);
|
|
|
|
if (pg_isshotdn(pdep)) {
|
|
/* We are racing with a process that is shooting down
|
|
* replicated pages. pdep seems to be one of those ptes
|
|
* that was mapping a page which got tossed out.
|
|
* Since process shooting down the pages has taken care
|
|
* of disassociating this process from the page, we just
|
|
* need to return without doing anything.
|
|
*
|
|
* Since either the pfn is locked or the pfdat is held,
|
|
* we dont really need to hold the memory_lock to check
|
|
* if pdep has been shot down.
|
|
*/
|
|
return;
|
|
}
|
|
|
|
RMAP_LOGENT(RMAP_DELMAPLOG, pfdp, pdep, __return_address);
|
|
RMAP_LOGCALLER(RMAP_DELMAPCLR, __return_address);
|
|
|
|
if (GET_RMAP_PDEP1(pfdp) == pdep) {
|
|
SET_RMAP_PDEP1(pfdp, 0);
|
|
|
|
if (pfdp->pf_pdep2 == 0) {
|
|
|
|
ASSERT(pfdp->pf_rmapp == 0);
|
|
|
|
/*
|
|
* This pde is the last one in the map,
|
|
* stop the migration for this page frame
|
|
*/
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST, pfdp, pdep,
|
|
__return_address);
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
if (IS_LONG_RMAP(pfdp) == 0){
|
|
|
|
if (pfdp->pf_pdep2 == pdep) {
|
|
pfdp->pf_pdep2 = 0;
|
|
|
|
if (GET_RMAP_PDEP1(pfdp) == 0) {
|
|
/*
|
|
* This pde is the last one in the map,
|
|
* stop the migration for this page frame
|
|
*/
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST, pfdp, pdep,
|
|
__return_address);
|
|
}
|
|
}
|
|
else {
|
|
cmn_err(CE_PANIC,
|
|
"rmap_del: pde 0x%x not in pfdat 0x%x",
|
|
pdep, pfdp);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
ASSERT(IS_LONG_RMAP(pfdp));
|
|
|
|
RMAP_DOSTAT(rmapldel);
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
|
|
first_level_index = FIRST_LEVEL_INDEX_OFFSET;
|
|
first_level_entry = GET_FIRST_LEVEL_TABLE(pf_rmap);
|
|
|
|
for (i = 0; i < FIRST_LEVEL_TABLE_SIZE(pf_rmap); i++,
|
|
first_level_index++, first_level_entry++) {
|
|
|
|
if (IS_FREE_ENTRY(first_level_entry)) continue;
|
|
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
if (first_level_entry->pdep == pdep) {
|
|
first_level_entry->freelist_index =
|
|
pf_rmap->rmap_freelist_head;
|
|
pf_rmap->rmap_freelist_head = i +
|
|
FIRST_LEVEL_INDEX_OFFSET;
|
|
pf_rmap->num_ptes--;
|
|
if (pf_rmap->num_ptes == 0) {
|
|
/*
|
|
* Free the first level table.
|
|
*/
|
|
kmem_zone_free(rmap_zone
|
|
[pf_rmap->rmap_zone_index],
|
|
pf_rmap);
|
|
CLR_LONG_RMAP(pfdp);
|
|
pfdp->pf_rmapp = RMAPNULL;
|
|
|
|
if (GET_RMAP_PDEP1(pfdp) == 0) {
|
|
/*
|
|
* This deleted pde is the last one
|
|
* in the map, stop the migration
|
|
* activiti for this page frame
|
|
*/
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST,
|
|
pfdp, pdep,
|
|
__return_address);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
second_level_table = GET_SECOND_LEVEL_TABLE(first_level_entry);
|
|
|
|
second_level_entry = second_level_table->table_entries;
|
|
|
|
for (j = 0; j < SECOND_LEVEL_TABLE_SIZE; j++,
|
|
second_level_entry++) {
|
|
if (IS_FREE_ENTRY(second_level_entry))
|
|
continue;
|
|
if (second_level_entry->pdep == pdep) {
|
|
second_level_entry->freelist_index =
|
|
second_level_table->freelist_head;
|
|
second_level_table->freelist_head = j;
|
|
second_level_table->num_free_entries++;
|
|
|
|
if (second_level_table->num_free_entries ==
|
|
SECOND_LEVEL_TABLE_SIZE) {
|
|
|
|
kmem_zone_free(second_level_table_zone,
|
|
second_level_table);
|
|
|
|
first_level_entry->freelist_index =
|
|
pf_rmap->rmap_freelist_head;
|
|
pf_rmap->rmap_freelist_head =
|
|
first_level_index;
|
|
}
|
|
|
|
pf_rmap->num_ptes--;
|
|
|
|
if (pf_rmap->num_ptes == 0) {
|
|
/*
|
|
* Free the first level table.
|
|
*/
|
|
kmem_zone_free(rmap_zone
|
|
[pf_rmap->rmap_zone_index],
|
|
pf_rmap);
|
|
CLR_LONG_RMAP(pfdp);
|
|
pfdp->pf_rmapp = RMAPNULL;
|
|
if (GET_RMAP_PDEP1(pfdp) == 0) {
|
|
/*
|
|
* This deleted pde is the last one
|
|
* in the map, stop the migration
|
|
* activiti for this page frame
|
|
*/
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST,
|
|
pfdp, pdep,
|
|
__return_address);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
}
|
|
}
|
|
cmn_err(CE_PANIC, "Rmap delmap: pde not in rmap pfdp 0x%x pdep 0x%x \n",
|
|
pfdp, pdep);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* rmap_swapmap:
|
|
* Search for the place where opde exists, and replace it with npde.
|
|
* At this time this's called only from seg_commonsplit, which is
|
|
* trying to separate the shared and private areas for sproc processes.
|
|
* 32 bit pte version.
|
|
*/
|
|
void
|
|
rmap_swapmap(pfd_t *pfdp, pde_t *opde, pde_t *npde )
|
|
{
|
|
|
|
rmap_t *pf_rmap;
|
|
int s,i, j;
|
|
table_ent_t *first_level_entry, *second_level_entry;
|
|
rmap_second_level_table_t *second_level_table;
|
|
|
|
RMAP_DOSTAT(rmapswap);
|
|
|
|
RMAP_LOGENT(RMAP_SWAPMAP, pfdp, opde, __return_address);
|
|
|
|
s = RMAP_LOCK(pfdp);
|
|
|
|
if (GET_RMAP_PDEP1(pfdp) == opde) {
|
|
SET_RMAP_PDEP1(pfdp, npde);
|
|
RMAP_UNLOCK(pfdp, s);
|
|
return;
|
|
}
|
|
|
|
if (!(IS_LONG_RMAP(pfdp))) {
|
|
ASSERT(pfdp->pf_pdep2 == opde); /* it better be true */
|
|
pfdp->pf_pdep2 = npde;
|
|
RMAP_UNLOCK(pfdp, s);
|
|
return;
|
|
}
|
|
|
|
/* This pfdat has one or more users. */
|
|
ASSERT(pfdp->pf_use >= 1);
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_TABLE(pf_rmap);
|
|
|
|
for (i = 0; i < FIRST_LEVEL_TABLE_SIZE(pf_rmap); i++,
|
|
first_level_entry++) {
|
|
|
|
if (IS_FREE_ENTRY(first_level_entry)) continue;
|
|
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
if (first_level_entry->pdep == opde) {
|
|
|
|
first_level_entry->pdep = npde;
|
|
RMAP_UNLOCK(pfdp, s);
|
|
return;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
second_level_table = GET_SECOND_LEVEL_TABLE(first_level_entry);
|
|
|
|
second_level_entry = second_level_table->table_entries;
|
|
|
|
for (j = 0; j < SECOND_LEVEL_TABLE_SIZE; j++,
|
|
second_level_entry++) {
|
|
if (IS_FREE_ENTRY(second_level_entry))
|
|
continue;
|
|
if (second_level_entry->pdep == opde) {
|
|
second_level_entry->freelist_index =
|
|
second_level_table->freelist_head;
|
|
second_level_entry->pdep = npde;
|
|
RMAP_UNLOCK(pfdp, s);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
cmn_err(CE_PANIC, "rmap_swap: opde 0x%x not in rmap\n", opde);
|
|
/*NOTREACHED*/
|
|
}
|
|
#endif /* RMAP_PTE_INDEX */
|
|
|
|
void
|
|
rmap_delmap(pfd_t *pfdp, pde_t *pdep)
|
|
{
|
|
int s = RMAP_LOCK(pfdp);
|
|
|
|
rmap_delmap_nolock(pfdp, pdep);
|
|
RMAP_UNLOCK(pfdp, s);
|
|
}
|
|
|
|
/*
|
|
* rmap_scanmap:
|
|
* Scan through the pdes mapping to a particular pfdat, and
|
|
* do the required operation on those pdes.
|
|
*
|
|
* LOCKING:
|
|
* This method does NOT do any locking. It is up to the
|
|
* caller to do the appropriate locking. At least
|
|
* the rmap lock for the corresponding page must be taken.
|
|
*/
|
|
|
|
int
|
|
rmap_scanmap(pfd_t *pfdp, uint_t op, void *datap)
|
|
{
|
|
|
|
int retval = 0, counter = 0;
|
|
table_index_t i, j;
|
|
table_ent_t *first_level_entry, *second_level_entry;
|
|
rmap_t *pf_rmap;
|
|
|
|
ASSERT(pfdp);
|
|
|
|
RMAP_LOGENT(RMAP_SCANMAP, pfdp, (pde_t *)((long)op), __return_address);
|
|
|
|
#ifdef RMAP_SCANTEST
|
|
/* If in scantest mode, Dont do anything unless it's anon page */
|
|
if (!(pfdp->pf_flags & (P_ANON|P_SQUEUE|P_DQUEUE|P_DIRTY|P_DONE))){
|
|
return;
|
|
}
|
|
#endif /* RMAP_SCANTEST */
|
|
|
|
RMAP_DOSTAT(rmapscan);
|
|
ASSERT(RMAP_ISLOCKED(pfdp));
|
|
|
|
/*
|
|
* It's possible to have an outer for loop, and a switch statement
|
|
* within the for loop for the required operation. But that would
|
|
* be less efficient in executing the required operation, than
|
|
* having a for loop in each case statement.
|
|
* Hence the replicated code in each case statement.
|
|
*/
|
|
switch(op) {
|
|
case RMAP_LOCKPFN:
|
|
retval = rmap_lockpfns(pfdp);
|
|
break;
|
|
|
|
case RMAP_VERIFYLOCKS:
|
|
retval = rmap_verify_locks(pfdp, (int)(__uint64_t)datap);
|
|
break;
|
|
|
|
case RMAP_SHOOTPFN:
|
|
retval = rmap_mark_shotdown(pfdp);
|
|
break;
|
|
|
|
case RMAP_COUNTLINKS:
|
|
|
|
if (GET_RMAP_PDEP1(pfdp))
|
|
retval++;
|
|
|
|
if (!IS_LONG_RMAP(pfdp)) {
|
|
if (pfdp->pf_pdep2)
|
|
retval++;
|
|
break;
|
|
}
|
|
|
|
retval += pfdp->pf_rmapp->num_ptes;
|
|
|
|
break;
|
|
|
|
case RMAP_CLRVALID:
|
|
case RMAP_SETVALID:
|
|
case RMAP_ZEROPDE:
|
|
case RMAP_SETPFN:
|
|
case RMAP_UNLOCKPFN:
|
|
case RMAP_SETPFN_AND_UNLOCK:
|
|
case RMAP_CHECK_LPAGE:
|
|
case RMAP_JOBRSS_TWO:
|
|
case RMAP_JOBRSS_ANY:
|
|
case RMAP_MIGR_CHECK:
|
|
#ifdef MH_R10000_SPECULATION_WAR
|
|
case RMAP_MH_SPECULATION_WAR:
|
|
#endif
|
|
|
|
if (GET_RMAP_PDEP1(pfdp))
|
|
retval = rmap_doop(op, GET_RMAP_PDEP1(pfdp), datap,
|
|
(void *)&counter);
|
|
|
|
if (retval)
|
|
return retval;
|
|
|
|
if (!(IS_LONG_RMAP(pfdp))) {
|
|
if (pfdp->pf_pdep2)
|
|
retval = rmap_doop(op, pfdp->pf_pdep2, datap,
|
|
(void *)&counter);
|
|
return retval;
|
|
}
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
ASSERT(pf_rmap);
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_TABLE(pf_rmap);
|
|
for (i = 0; i < FIRST_LEVEL_TABLE_SIZE(pf_rmap); i++,
|
|
first_level_entry++) {
|
|
|
|
if (IS_FREE_ENTRY(first_level_entry)) continue;
|
|
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
retval = rmap_doop(op, first_level_entry->pdep,
|
|
datap, (void *)&counter);
|
|
if (retval)
|
|
return retval;
|
|
|
|
continue;
|
|
}
|
|
|
|
second_level_entry =
|
|
GET_SECOND_LEVEL_ENTRY(first_level_entry, 0);
|
|
for (j = 0; j < SECOND_LEVEL_TABLE_SIZE; j++,
|
|
second_level_entry++) {
|
|
if (IS_FREE_ENTRY(second_level_entry))
|
|
continue;
|
|
retval = rmap_doop(op, second_level_entry->pdep,
|
|
datap, (void *)&counter);
|
|
if (retval)
|
|
return retval;
|
|
}
|
|
}
|
|
break;
|
|
|
|
|
|
default:
|
|
cmn_err(CE_PANIC,"Invalid option %d to rmap_scanmap",op);
|
|
break;
|
|
/*NOTREACHED*/
|
|
|
|
}
|
|
|
|
return (retval);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* Returns 0 on success, non zero on failure.
|
|
*/
|
|
|
|
int
|
|
rmap_doop(uint op, pde_t *pdep, void *datap, void *counter)
|
|
{
|
|
int retval = 0;
|
|
/* REFERENCED */
|
|
long rmap_index;
|
|
|
|
switch(op) {
|
|
|
|
case RMAP_CLRVALID:
|
|
pg_clrvalid(pdep);
|
|
break;
|
|
|
|
case RMAP_SETVALID:
|
|
pg_setsftval(pdep);
|
|
break;
|
|
|
|
case RMAP_ZEROPDE:
|
|
pg_clrpgi(pdep);
|
|
break;
|
|
|
|
case RMAP_SETPFN:
|
|
pg_setpfn(pdep, *(pfn_t *)datap);
|
|
break;
|
|
|
|
case RMAP_UNLOCKPFN:
|
|
pg_pfnrelease(pdep);
|
|
break;
|
|
|
|
case RMAP_SETPFN_AND_UNLOCK:
|
|
rmap_index = pg_get_rmap_index(pdep);
|
|
pg_setpfn(pdep, *(pfn_t *)datap);
|
|
pg_pfnrelease(pdep);
|
|
ASSERT(rmap_index == pg_get_rmap_index(pdep));
|
|
break;
|
|
|
|
case RMAP_CHECK_LPAGE:
|
|
retval = (int)pg_get_page_mask_index(pdep);
|
|
break;
|
|
|
|
#ifdef MH_R10000_SPECULATION_WAR
|
|
case RMAP_MH_SPECULATION_WAR:
|
|
rmap_invalidate_uptbl_entry(pdep, datap);
|
|
break;
|
|
#endif
|
|
case RMAP_JOBRSS_ANY:
|
|
retval = pmap_pte_scan(pdep, miser_jobcount, datap, 0,
|
|
JOB_SCAN);
|
|
break;
|
|
case RMAP_JOBRSS_TWO:
|
|
retval = pmap_pte_scan(pdep, miser_jobcount, datap, counter,
|
|
JOB_SCAN);
|
|
break;
|
|
case RMAP_MIGR_CHECK:
|
|
retval = pg_isfetchop(pdep);
|
|
break;
|
|
default:
|
|
cmn_err(CE_PANIC,"Invalid option %d to rmap_scanmap",op);
|
|
break;
|
|
/*NOTREACHED*/
|
|
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
#ifdef MH_R10000_SPECULATION_WAR
|
|
static int
|
|
rmap_invalidate_uptbl_entry(pde_t *pde, void *counts)
|
|
{
|
|
pfd_t *pfd;
|
|
extern is_in_pfdat(pgno_t pfn);
|
|
|
|
if (! pg_isvalid(pde) ||
|
|
pg_isnoncache(pde))
|
|
return 0;
|
|
|
|
if (pg_ismod(pde) &&
|
|
is_in_pfdat(pdetopfn(pde))) {
|
|
pfd = pdetopfdat(pde);
|
|
pfd->pf_flags |= P_DIRTY;
|
|
}
|
|
if (pg_ishrdvalid(pde)) {
|
|
pg_clrhrdvalid(pde);
|
|
((int *)counts)[1]++;
|
|
}
|
|
((int *)counts)[0]++;
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Lock all pfns.
|
|
* Returns :
|
|
* -1 if unable to lock pdep
|
|
* -2 if unable to lock pdep2
|
|
* -3 if unable to lock a pfn in any of the reverse maps.
|
|
* number of locked pfns if success.
|
|
*
|
|
*/
|
|
static int
|
|
rmap_lockpfns(pfd_t *pfdp)
|
|
{
|
|
|
|
int retval = 0;
|
|
rmap_t *pf_rmap;
|
|
int lock_failure = 0;
|
|
table_index_t i, j;
|
|
table_ent_t *first_level_entry, *second_level_entry;
|
|
pde_t *pdep1;
|
|
|
|
RMAP_VERIFY_LOCK_CONSISTENCY(pfdp);
|
|
|
|
pdep1 = GET_RMAP_PDEP1(pfdp);
|
|
|
|
if (pdep1) {
|
|
if (pg_pfncondacq(pdep1)) {
|
|
RMAP_VERIFY_LOCK_CONSISTENCY(pfdp);
|
|
return -1;
|
|
}
|
|
retval++;
|
|
}
|
|
|
|
if (!(IS_LONG_RMAP(pfdp))) {
|
|
if (pfdp->pf_pdep2) {
|
|
if (pg_pfncondacq(pfdp->pf_pdep2)) {
|
|
/*
|
|
* We need to release the locks we've
|
|
* already acquired.
|
|
*/
|
|
if (pdep1)
|
|
pg_pfnrelease(pdep1);
|
|
RMAP_VERIFY_LOCK_CONSISTENCY(pfdp);
|
|
return -2;
|
|
}
|
|
retval++;
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
|
|
ASSERT(pf_rmap);
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_TABLE(pf_rmap);
|
|
|
|
for (i = 0; i < FIRST_LEVEL_TABLE_SIZE(pf_rmap); i++,
|
|
first_level_entry++) {
|
|
if (IS_FREE_ENTRY(first_level_entry)) continue;
|
|
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
|
|
|
|
if (!(pg_pfncondacq(first_level_entry->pdep))) {
|
|
/* Got the lock, keep going */
|
|
retval++;
|
|
continue;
|
|
}
|
|
|
|
/* Screeeeeeeeeeeeech, could not get a lock.
|
|
* need to unroll everything done so far.
|
|
*/
|
|
|
|
lock_failure++;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Need to look at second level table.
|
|
*/
|
|
second_level_entry =
|
|
GET_SECOND_LEVEL_ENTRY(first_level_entry, 0);
|
|
|
|
for (j = 0; j < SECOND_LEVEL_TABLE_SIZE; j++,
|
|
second_level_entry++) {
|
|
|
|
if (IS_FREE_ENTRY(second_level_entry))
|
|
continue;
|
|
|
|
if (!(pg_pfncondacq(second_level_entry->pdep))){
|
|
/* Got the lock, keep going */
|
|
retval++;
|
|
continue;
|
|
}
|
|
|
|
/* Screeeeeeeeeeeeech, could not get a lock.
|
|
* need to unroll everything done so far.
|
|
*/
|
|
|
|
/*
|
|
* Unlock all the second level entries for this
|
|
* table here. We will break out of the loop
|
|
* and unlock the rest of the ptes.
|
|
*/
|
|
second_level_entry--;
|
|
for (; j > 0; j--, second_level_entry--) {
|
|
if (IS_FREE_ENTRY(second_level_entry))
|
|
continue;
|
|
pg_pfnrelease(second_level_entry->pdep);
|
|
}
|
|
|
|
lock_failure++;
|
|
break;
|
|
}
|
|
|
|
if (lock_failure)
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* If there are no lock failures return success.
|
|
*/
|
|
if (!lock_failure ) return retval;
|
|
|
|
/*
|
|
* First we unlock the first direct
|
|
* pte.
|
|
*/
|
|
if(pdep1)
|
|
pg_pfnrelease(pdep1);
|
|
|
|
/*
|
|
* Unlock all the first level entries.
|
|
*/
|
|
|
|
while (i--) {
|
|
first_level_entry--;
|
|
if (IS_FREE_ENTRY(first_level_entry)) continue;
|
|
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
pg_pfnrelease(first_level_entry->pdep);
|
|
continue;
|
|
}
|
|
|
|
second_level_entry =
|
|
GET_SECOND_LEVEL_ENTRY(first_level_entry, 0);
|
|
|
|
for (j = 0; j < SECOND_LEVEL_TABLE_SIZE; j++,
|
|
second_level_entry++) {
|
|
|
|
if (!IS_FREE_ENTRY(second_level_entry))
|
|
pg_pfnrelease(second_level_entry->pdep);
|
|
}
|
|
|
|
}
|
|
|
|
RMAP_VERIFY_LOCK_CONSISTENCY(pfdp);
|
|
return -3;
|
|
}
|
|
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
rmap_verify_locks(pfd_t *pfdp, int process_id)
|
|
{
|
|
int error_counter = 0;
|
|
#if defined(PTE_64BIT) && defined(NUMA_BASE)
|
|
rmap_t *pf_rmap;
|
|
table_index_t i, j;
|
|
table_ent_t *first_level_entry, *second_level_entry;
|
|
pde_t *pdep1;
|
|
|
|
|
|
pdep1 = GET_RMAP_PDEP1(pfdp);
|
|
if (pdep1) {
|
|
if ((pdep1->pte.pte_numa_home & 0xFFFF) == process_id) {
|
|
error_counter++;
|
|
}
|
|
}
|
|
|
|
if (!(IS_LONG_RMAP(pfdp))) {
|
|
if ((pfdp->pf_pdep2) &&
|
|
((pfdp->pf_pdep2->pte.pte_numa_home & 0xFFFF) == process_id)) {
|
|
error_counter++;
|
|
}
|
|
return error_counter;
|
|
}
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
ASSERT(pf_rmap);
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_TABLE(pf_rmap);
|
|
|
|
for (i = 0; i < FIRST_LEVEL_TABLE_SIZE(pf_rmap); i++,
|
|
first_level_entry++) {
|
|
if (IS_FREE_ENTRY(first_level_entry))
|
|
continue;
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
ASSERT(first_level_entry->pdep);
|
|
if ((first_level_entry->pdep->pte.pte_numa_home
|
|
& 0xFFFF) == process_id) {
|
|
error_counter++;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
second_level_entry =
|
|
GET_SECOND_LEVEL_ENTRY(first_level_entry, 0);
|
|
|
|
for (j = 0; j < SECOND_LEVEL_TABLE_SIZE; j++,
|
|
second_level_entry++) {
|
|
if (!IS_FREE_ENTRY(second_level_entry)) {
|
|
ASSERT(second_level_entry->pdep);
|
|
if ((second_level_entry->pdep->pte.pte_numa_home
|
|
& 0xFFFF) == process_id) {
|
|
error_counter++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif /* PTE_64BIT */
|
|
|
|
return (error_counter);
|
|
}
|
|
|
|
|
|
/*
|
|
* rmap_mark_shotdown:
|
|
* Need to do all the things in a pde to
|
|
* indicate that this has been shotdown.
|
|
* Assumes that the pde has been locked
|
|
* SIDE EFFECT
|
|
* This function has the side effect of unlocking the pfns
|
|
* and freeing up the reverse maps associated with the page
|
|
* This has been specifically to make the page shoot down
|
|
* to be completed with minimal lock acquire/free cycles
|
|
*/
|
|
static int
|
|
rmap_mark_shotdown(pfd_t *pfdp)
|
|
{
|
|
rmap_t *pf_rmap;
|
|
int rmap_cnt = 0;
|
|
table_index_t i, j;
|
|
table_ent_t *first_level_entry, *second_level_entry;
|
|
pde_t *pdep1 = GET_RMAP_PDEP1(pfdp);
|
|
|
|
if (pdep1){
|
|
/* Make it a single atomic OP ? */
|
|
pg_clrvalid(pdep1);
|
|
pg_setpfn(pdep1, 0);
|
|
pg_setshotdn(pdep1);
|
|
pg_pfnrelease(pdep1);
|
|
rmap_cnt++;
|
|
SET_RMAP_PDEP1(pfdp, 0);
|
|
}
|
|
|
|
if (!(IS_LONG_RMAP(pfdp))) {
|
|
if (pfdp->pf_pdep2){
|
|
/* Make it a single atomic OP ? */
|
|
pg_clrvalid(pfdp->pf_pdep2);
|
|
pg_setpfn(pfdp->pf_pdep2, 0);
|
|
pg_setshotdn(pfdp->pf_pdep2);
|
|
pg_pfnrelease(pfdp->pf_pdep2);
|
|
rmap_cnt++;
|
|
pfdp->pf_pdep2 = 0;
|
|
}
|
|
migr_stop(pfdattopfn(pfdp));
|
|
return rmap_cnt;
|
|
}
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
ASSERT(pf_rmap);
|
|
pfdp->pf_rmapp = 0;
|
|
CLR_LONG_RMAP(pfdp);
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_TABLE(pf_rmap);
|
|
|
|
for (i = 0; i < FIRST_LEVEL_TABLE_SIZE(pf_rmap); i++,
|
|
first_level_entry++) {
|
|
if (IS_FREE_ENTRY(first_level_entry))
|
|
continue;
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
/* Make it a single atomic OP ? */
|
|
pg_clrvalid(first_level_entry->pdep);
|
|
pg_setpfn(first_level_entry->pdep, 0);
|
|
pg_setshotdn(first_level_entry->pdep);
|
|
pg_pfnrelease(first_level_entry->pdep);
|
|
rmap_cnt++;
|
|
continue;
|
|
}
|
|
|
|
second_level_entry =
|
|
GET_SECOND_LEVEL_ENTRY(first_level_entry, 0);
|
|
|
|
for (j = 0; j < SECOND_LEVEL_TABLE_SIZE; j++,
|
|
second_level_entry++) {
|
|
if (!IS_FREE_ENTRY(second_level_entry)) {
|
|
/* Make it a single atomic OP ? */
|
|
pg_clrvalid(second_level_entry->pdep);
|
|
pg_setpfn(second_level_entry->pdep, 0);
|
|
pg_setshotdn(second_level_entry->pdep);
|
|
pg_pfnrelease(second_level_entry->pdep);
|
|
rmap_cnt++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Free the second level table.
|
|
*/
|
|
kmem_zone_free(second_level_table_zone,
|
|
GET_SECOND_LEVEL_TABLE(first_level_entry));
|
|
}
|
|
|
|
/*
|
|
* Free the first level table.
|
|
*/
|
|
kmem_zone_free(rmap_zone[pf_rmap->rmap_zone_index], pf_rmap);
|
|
|
|
if (rmap_cnt != 0) {
|
|
/* Stop the migration activity for the page */
|
|
migr_stop(pfdattopfn(pfdp));
|
|
RMAP_LOGENT(RMAP_DELMAP_LAST, pfdp, (pde_t *)NULL,
|
|
__return_address);
|
|
}
|
|
|
|
return rmap_cnt;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* rmap_xfer:
|
|
* Transfer all reverse mapping state from old pfdat to new pfdat.
|
|
* This includes, moving the reverse mapping pointer if any to new
|
|
* pfdat, as well as any other flags/state that may be in the old pfdat.
|
|
*
|
|
* Since rmap_xfer is called when the page migration is destined to succeed,
|
|
* the migration interrupt is turned off for the old pfd and turned on
|
|
* for the new pfd to keep the notion of enabling migration interrupt
|
|
* consistent with rmap operations.
|
|
*
|
|
* We'd better enable and disable the migration interrupt for one page
|
|
* while we're having the lock for rmap. Thus, we can avoid some possible
|
|
* races. One scenario is: after releasing the rmap lock and before
|
|
* enabling the migration interrupt for the new page, one mapping is gone
|
|
* and one assertion fails.
|
|
*
|
|
* TBD: Scheme for locking the two pfdats.
|
|
* Transferring other pfdat fields (e.g. tag, count, flags etc) needs to
|
|
* be done elsewhere.
|
|
*/
|
|
void
|
|
rmap_xfer(pfd_t *opfd, pfd_t *npfd)
|
|
{
|
|
|
|
/*
|
|
* XXX
|
|
* It may be necessary to lock the two pfdats here. One possible
|
|
* scheme to avoid dead-locks is to lock the pfdat with lower addr
|
|
* first, and then the higher addressed one.
|
|
* For now, we assume mem_lock is taken by the caller.
|
|
*/
|
|
|
|
/* Update new pde */
|
|
npfd->pf_rmapp = opfd->pf_rmapp;
|
|
SET_RMAP_PDEP1(npfd, GET_RMAP_PDEP1(opfd));
|
|
if IS_LONG_RMAP(opfd)
|
|
SET_LONG_RMAP(npfd);
|
|
|
|
/* Zero out old pde fields */
|
|
opfd->pf_rmapp = 0;
|
|
SET_RMAP_PDEP1(opfd, 0);
|
|
CLR_LONG_RMAP(opfd);
|
|
|
|
}
|
|
|
|
/*
|
|
* Returns true if a page is valid or it is migrating.
|
|
* We serialize the testing using the pfdat_lock() as it is held before
|
|
* clearing the P_ISMIGRATING bit in migr_page_epilogue().
|
|
*/
|
|
int
|
|
pg_ismigrating(pde_t *pd)
|
|
{
|
|
pfd_t *pfd;
|
|
int s;
|
|
pfn_t pfn;
|
|
|
|
/*
|
|
* YUCK ALERT!! : dont use
|
|
* pdetopfdat(pd)
|
|
*
|
|
* here. The way the macros are currently constructed, this
|
|
* may cause pdetopfn to be evaluated twice. If migration
|
|
* is in the process of changing the pd.pfn, we may get a bogus
|
|
* pfd if the first evaluation sees the old pfn & the second
|
|
* evaluation see the new pfn. (Macros should be changed to fix
|
|
* this, but now is not the time to do it!!).
|
|
*
|
|
* Note: problem exists only if you compile at low optimizations.
|
|
*
|
|
*/
|
|
pfn = pdetopfn(pd);
|
|
pfd = (pfn > PG_SENTRY) ? pfntopfdat(pfn) : (pfd_t *)0;
|
|
|
|
if (!pfd) return pg_isvalid(pd);
|
|
|
|
if (pfdat_ismigrating(pfd)) return TRUE;
|
|
|
|
s = pfdat_lock(pfd);
|
|
|
|
KTRACE_ENTER(vnode_ktrace, VNODE_PG_ISMIGRATING, pfd->pf_tag, pg_isvalid(pd), pfd->pf_flags);
|
|
if (pg_isvalid(pd) || pfdat_ismigrating(pfd)) {
|
|
pfdat_unlock(pfd, s);
|
|
return TRUE;
|
|
}
|
|
pfdat_unlock(pfd, s);
|
|
return FALSE;
|
|
}
|
|
|
|
#ifdef DEBUG_PFNLOCKS
|
|
/*
|
|
* Debugging support for pde-pfnlocks
|
|
*/
|
|
|
|
/*
|
|
* The following locking routines are intended to be used
|
|
* ONLY for debugging migration when migration is done via
|
|
* a syssgi syscall. Obviously these routines do NOT work
|
|
* within intr handlers.
|
|
*/
|
|
void
|
|
pg_pfnacquire(pde_t* pde)
|
|
{
|
|
ASSERT(pde);
|
|
|
|
/*****
|
|
printf("ACQUIRING: PFNLOCK FROM 0x%llx, pte_pfnlock: %d, pfn: 0x%x\n",
|
|
pde, pde->pte.pte_pfnlock, pde->pte.pte_pfn);
|
|
*****/
|
|
|
|
PG_LOCK((pde)->pgi, PG_PFNLOCK);
|
|
}
|
|
|
|
void
|
|
pg_pfnrelease(pde_t* pde)
|
|
{
|
|
ASSERT(pde);
|
|
PG_UNLOCK((pde)->pgi, PG_PFNLOCK);
|
|
}
|
|
|
|
int
|
|
pg_pfncondacq(pde_t* pde)
|
|
{
|
|
if (PG_TRYLOCK((pde)->pgi, PG_PFNLOCK) == 0) {
|
|
/* success */
|
|
return (0);
|
|
} else {
|
|
return (1);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void
|
|
rmap_verify_lock_consistency(pfd_t *pfdp)
|
|
{
|
|
rmap_t* irmap;
|
|
int i;
|
|
|
|
#ifdef R10000
|
|
return;
|
|
#else /* !R10000 */
|
|
ASSERT(curvprocp);
|
|
rmap_verify_locks(pfdp, current_pid());
|
|
#endif /* !R10000 */
|
|
}
|
|
|
|
#endif /* DEBUG_PFNLOCKS */
|
|
|
|
|
|
|
|
#ifdef RMAP_DEBUG
|
|
|
|
/*
|
|
* Print the rmaps for a given pfdat
|
|
*/
|
|
|
|
#define DUMP_PFDAT 0x1
|
|
|
|
void
|
|
idbg_rmapprint(pfn_t pfn)
|
|
{
|
|
pfd_t *pfdp = pfntopfdat(pfn);
|
|
rmap_t *pf_rmap;
|
|
table_index_t i, j;
|
|
table_ent_t *first_level_entry, *second_level_entry;
|
|
pde_t *pdep1;
|
|
|
|
pdep1 = GET_RMAP_PDEP1(pfdp);
|
|
if (pdep1) {
|
|
qprintf("[RMAP1] 0x%x ", pdep1);
|
|
idbg_dopde(pdep1, 0, DUMP_PFDAT);
|
|
if (pfn != pdep1->pte.pg_pfn) {
|
|
qprintf("*** Previous RMAP is not consistent\n");
|
|
}
|
|
}
|
|
|
|
if (!(IS_LONG_RMAP(pfdp))) {
|
|
if (pfdp->pf_pdep2) {
|
|
qprintf("[RMAP2] 0x%x ", pfdp->pf_pdep2);
|
|
idbg_dopde(pfdp->pf_pdep2, 0, DUMP_PFDAT);
|
|
if (pfn != pfdp->pf_pdep2->pte.pg_pfn) {
|
|
qprintf("*** Previous RMAP is not consistent\n");
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
pf_rmap = pfdp->pf_rmapp;
|
|
ASSERT(pf_rmap);
|
|
|
|
qprintf("Printing Rmap info at 0x%x \n", pf_rmap);
|
|
qprintf("First level table size %d num ptes %d\n",
|
|
FIRST_LEVEL_TABLE_SIZE(pf_rmap),
|
|
pf_rmap->num_ptes);
|
|
qprintf("Freelist head %d free table hint %d\n",
|
|
pf_rmap->rmap_freelist_head,
|
|
pf_rmap->rmap_freetable_hint);
|
|
|
|
first_level_entry = GET_FIRST_LEVEL_TABLE(pf_rmap);
|
|
|
|
for (i = 0; i < FIRST_LEVEL_TABLE_SIZE(pf_rmap); i++,
|
|
first_level_entry++) {
|
|
|
|
if (IS_FREE_ENTRY(first_level_entry)) {
|
|
qprintf("[RMAPI] free entry 0x%x\n",
|
|
first_level_entry->freelist_index);
|
|
continue;
|
|
}
|
|
|
|
if (!IS_SECOND_LEVEL(first_level_entry)) {
|
|
qprintf("[RMAPI] 0x%x ", first_level_entry->pdep);
|
|
idbg_dopde(first_level_entry->pdep, 0, DUMP_PFDAT);
|
|
if (pfn != first_level_entry->pdep->pte.pg_pfn) {
|
|
qprintf("*** Previous RMAP is not consistent\n");
|
|
}
|
|
continue;
|
|
}
|
|
|
|
second_level_entry =
|
|
GET_SECOND_LEVEL_ENTRY(first_level_entry, 0);
|
|
for (j = 0; j < SECOND_LEVEL_TABLE_SIZE; j++,
|
|
second_level_entry++) {
|
|
if (!IS_FREE_ENTRY(second_level_entry)) {
|
|
qprintf("[RMAPII] 0x%x ",
|
|
second_level_entry->pdep);
|
|
idbg_dopde(second_level_entry->pdep,0,DUMP_PFDAT);
|
|
if (pfn !=
|
|
second_level_entry->pdep->pte.pg_pfn) {
|
|
qprintf("*** Previous RMAP is not consistent\n");
|
|
}
|
|
} else
|
|
qprintf("[RMAPII] second level free entry 0x%x\n",
|
|
second_level_entry->freelist_index);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
char *rmap_type[] = {
|
|
"",
|
|
"Add ",
|
|
"Del ",
|
|
"Swap",
|
|
"Scan",
|
|
"Add_First ",
|
|
"Del_Last ",
|
|
""
|
|
};
|
|
|
|
/*
|
|
* Print the Rmap log entries which correspond to the given entry
|
|
*/
|
|
int
|
|
idbg_rmaplog(void *rmap_data)
|
|
{
|
|
int i;
|
|
qprintf("Printing rmaplog for data 0x%x\n", rmap_data);
|
|
i = rmap_dbgindx;
|
|
do {
|
|
if (rmap_dbglist[i].rmap_pdep == (pde_t *)rmap_data ||
|
|
rmap_dbglist[i].rmap_pfdp == (pfd_t *)rmap_data )
|
|
|
|
qprintf("%s@%x rmap %d pid %d pd %x pf %x ra %x\n",
|
|
rmap_type[rmap_dbglist[i].rmap_op & 0xF],
|
|
rmap_dbglist[i].rmap_tm,
|
|
rmap_dbglist[i].rmap_fl,
|
|
rmap_dbglist[i].rmap_pid,
|
|
rmap_dbglist[i].rmap_pdep,
|
|
rmap_dbglist[i].rmap_pfdp,
|
|
rmap_dbglist[i].rmap_ra);
|
|
|
|
if (++i == RMAP_DBGENTS)
|
|
i = 0;
|
|
} while (i != rmap_dbgindx);
|
|
return 0;
|
|
}
|
|
|
|
extern int numnodes;
|
|
|
|
#include <sys/nodepda.h>
|
|
|
|
int rmap_toscan = 128;
|
|
void
|
|
rmap_scantest()
|
|
{
|
|
static cnodeid_t node = 0;
|
|
static pfd_t *pfd1 = 0;
|
|
int s;
|
|
|
|
int pfdstoscan;
|
|
pfd_t *pfd2;
|
|
|
|
if (pfd1 == 0)
|
|
pfd1 = PFD_LOW(node);
|
|
|
|
pfdstoscan = rmap_toscan;
|
|
|
|
while (pfdstoscan) {
|
|
|
|
pfd2 = PFD_HIGH(node);
|
|
|
|
for (; pfdstoscan && (pfd1 <= pfd2); pfd1++, pfdstoscan--) {
|
|
|
|
if ((pfd1->pf_use == 0) || (pfd1->pf_flags & P_QUEUE))
|
|
/* Page not in use, or on free list */
|
|
continue;
|
|
|
|
/* Dont do anything unless it's anon page */
|
|
if (!(pfd1->pf_flags & P_ANON))
|
|
continue;
|
|
|
|
if (!pfd1->pf_pdep2 && !pfd1->pf_pdep1)
|
|
/* No reverse map attached */
|
|
continue;
|
|
|
|
s = RMAP_LOCK(pfd1);
|
|
if (rmap_scanmap(pfd1, RMAP_CLRVALID, 0) != 0) {
|
|
ASSERT(0);
|
|
}
|
|
RMAP_UNLOCK(pfd1, s);
|
|
|
|
}
|
|
|
|
if (!pfdstoscan)
|
|
break;
|
|
|
|
if (++node == numnodes){
|
|
node = 0;
|
|
}
|
|
pfd1 = PFD_LOW(node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static void
|
|
rmap_logcallers(int type, unsigned long caller_addr)
|
|
{
|
|
int i, freeent = RMAP_MAXCALLERS;
|
|
|
|
for (i=0; i < RMAP_MAXCALLERS; i++){
|
|
if (!rmap_callers[type][i])
|
|
freeent = i;
|
|
if (rmap_callers[type][i] == caller_addr)
|
|
return;
|
|
}
|
|
|
|
if (freeent == RMAP_MAXCALLERS){
|
|
cmn_err_tag(290,CE_WARN,"No free slot for address 0x%x type: %d\n",
|
|
caller_addr, type);
|
|
return;
|
|
}
|
|
rmap_callers[type][freeent] = caller_addr;
|
|
}
|
|
|
|
#endif /* RMAP_DEBUG */
|
|
|
|
|
|
#ifdef RMAP_STATS
|
|
|
|
#define MAX_PFDATS 8192
|
|
|
|
struct rmap_pfdat_stat {
|
|
int maxlen;
|
|
int len;
|
|
} rmap_pfdat_stat_buf[MAX_PFDATS];
|
|
|
|
|
|
|
|
struct vnode;
|
|
int vnode_max_len;
|
|
struct vnode *max_vnode;
|
|
|
|
void
|
|
pfdat_add(pfd_t *pfd)
|
|
{
|
|
struct rmap_pfdat_stat *p;
|
|
|
|
if (( pfd - PFD_LOW(0)) > MAX_PFDATS) {
|
|
qprintf("Running out entries pfdat %x pfd_low %x\n",
|
|
pfd, PFD_LOW(0));
|
|
return;
|
|
}
|
|
|
|
p = &rmap_pfdat_stat_buf[pfd - PFD_LOW(0)];
|
|
p->len++;
|
|
if (p->len > p->maxlen) p->maxlen = p->len;
|
|
if (p->maxlen > vnode_max_len) {
|
|
vnode_max_len = p->maxlen;
|
|
max_vnode = pfd->pf_vp;
|
|
}
|
|
}
|
|
|
|
void
|
|
pfdat_del(pfd_t *pfd)
|
|
{
|
|
struct rmap_pfdat_stat *p;
|
|
|
|
if (( pfd - PFD_LOW(0)) > MAX_PFDATS) {
|
|
qprintf("Running out entries pfdat %x pfd_low %x\n",
|
|
pfd, PFD_LOW(0));
|
|
return;
|
|
}
|
|
p = &rmap_pfdat_stat_buf[pfd - PFD_LOW(0)];
|
|
p->len--;
|
|
}
|
|
|
|
|
|
int
|
|
idbg_rmap_stats()
|
|
{
|
|
#ifdef NOTDEF
|
|
pfd_t *pfd;
|
|
int i;
|
|
#endif
|
|
|
|
qprintf("Rmap stats adds 0x%x del 0x%x ladds 0x%x ldels 0x%x\n",
|
|
rmapinfo.rmapadd, rmapinfo.rmapdel, rmapinfo.rmapladd,
|
|
rmapinfo.rmapldel);
|
|
|
|
#ifdef NOTDEF
|
|
qprintf("System Max rmap len 0x%x vnode 0x%x\n",
|
|
vnode_max_len, max_vnode);
|
|
for (pfd = PFD_LOW(0), i = 0; pfd < PFD_HIGH(0); pfd++, i++)
|
|
if (rmap_pfdat_stat_buf[i].maxlen)
|
|
qprintf("Pfd 0x%x maxlen 0x%x\n",
|
|
pfd, rmap_pfdat_stat_buf[i].maxlen);
|
|
#endif
|
|
return 0;
|
|
}
|
|
#endif /* RMAP_STATS */
|
|
|
|
|