#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "dom0_ops.h" #define MAP_CONT 0 #define MAP_DISCONT 1 extern struct list_head * find_direct(struct list_head *, unsigned long); /* * bd240: functions below perform direct mapping to the real physical pages * needed for mapping various hypervisor specific structures needed in dom0 * userspace by various management applications such as domain builder etc. */ #define direct_set_pte(pteptr, pteval) queue_l1_entry_update(__pa(pteptr)|PGREQ_UNCHECKED_UPDATE, (pteval).pte_low) #define direct_pte_clear(pteptr) queue_l1_entry_update(__pa(pteptr)|PGREQ_UNCHECKED_UPDATE, 0) #define __direct_pte(x) ((pte_t) { (x) } ) #define __direct_mk_pte(page_nr,pgprot) __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) #define direct_mk_pte_phys(physpage, pgprot) __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) static inline void forget_pte(pte_t page) { if (!pte_none(page)) { printk("forget_pte: old mapping existed!\n"); BUG(); } } static inline void direct_remappte_range(pte_t * pte, unsigned long address, unsigned long size, unsigned long phys_addr, pgprot_t prot) { unsigned long end; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { pte_t oldpage; oldpage = ptep_get_and_clear(pte); direct_set_pte(pte, direct_mk_pte_phys(phys_addr, prot)); forget_pte(oldpage); address += PAGE_SIZE; phys_addr += PAGE_SIZE; pte++; } while (address && (address < end)); } static inline int direct_remappmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, unsigned long phys_addr, pgprot_t prot) { unsigned long end; address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; phys_addr -= address; do { pte_t * pte = pte_alloc(mm, pmd, address); if (!pte) return -ENOMEM; direct_remappte_range(pte, address, end - address, address + phys_addr, prot); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); return 0; } /* Note: this is only safe if the mm semaphore is held when called. */ int direct_remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; struct mm_struct *mm = current->mm; phys_addr -= from; dir = pgd_offset(mm, from); flush_cache_range(mm, beg, end); if (from >= end) BUG(); spin_lock(&mm->page_table_lock); do { pmd_t *pmd = pmd_alloc(mm, dir, from); error = -ENOMEM; if (!pmd) break; error = direct_remappmd_range(mm, pmd, from, end - from, phys_addr + from, prot); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (from && (from < end)); spin_unlock(&mm->page_table_lock); flush_tlb_range(mm, beg, end); return error; } /* * used for remapping discontiguous bits of domain's memory, pages to map are * found from frame table beginning at the given first_pg index */ int direct_remap_disc_page_range(unsigned long from, unsigned long first_pg, int tot_pages, pgprot_t prot) { dom0_op_t dom0_op; unsigned long *pfns = get_free_page(GFP_KERNEL); unsigned long start = from; int pages, i; while ( tot_pages != 0 ) { dom0_op.cmd = DOM0_GETMEMLIST; dom0_op.u.getmemlist.start_pfn = first_pg; pages = 1023; dom0_op.u.getmemlist.num_pfns = 1024; if ( tot_pages < 1024 ) dom0_op.u.getmemlist.num_pfns = pages = tot_pages; dom0_op.u.getmemlist.buffer = pfns; (void)HYPERVISOR_dom0_op(&dom0_op); first_pg = pfns[1023]; for ( i = 0; i < pages; i++ ) { if(direct_remap_page_range(start, pfns[i] << PAGE_SHIFT, PAGE_SIZE, prot)) goto out; start += PAGE_SIZE; tot_pages--; } } out: free_page(pfns); return tot_pages; } /* below functions replace standard sys_mmap and sys_munmap which are absolutely useless * for direct memory mapping. direct_zap* functions are minor ammendments to the * original versions in mm/memory.c. the changes are to enable unmapping of real physical * addresses. */ unsigned long direct_mmap(unsigned long phys_addr, unsigned long size, pgprot_t prot, int flag, int tot_pages) { direct_mmap_node_t * dmmap; struct list_head * entry; unsigned long addr; int ret = 0; if(!capable(CAP_SYS_ADMIN)){ ret = -EPERM; goto out; } /* get unmapped area invokes xen specific arch_get_unmapped_area */ addr = get_unmapped_area(NULL, 0, size, 0, 0); if(addr & ~PAGE_MASK){ ret = -ENOMEM; goto out; } /* add node on the list of directly mapped areas, make sure the * list remains sorted. */ dmmap = (direct_mmap_node_t *)kmalloc(sizeof(direct_mmap_node_t), GFP_KERNEL); dmmap->vm_start = addr; dmmap->vm_end = addr + size; entry = find_direct(¤t->mm->context.direct_list, addr); if(entry != ¤t->mm->context.direct_list){ list_add_tail(&dmmap->list, entry); } else { list_add_tail(&dmmap->list, ¤t->mm->context.direct_list); } /* and perform the mapping */ if(flag == MAP_DISCONT){ ret = direct_remap_disc_page_range(addr, phys_addr >> PAGE_SHIFT, tot_pages, prot); } else { ret = direct_remap_page_range(addr, phys_addr, size, prot); } if(ret == 0) ret = addr; out: return ret; } /* most of the checks, refcnt updates, cache stuff have been thrown out as they are not * needed */ static inline int direct_zap_pte_range(mmu_gather_t *tlb, pmd_t * pmd, unsigned long address, unsigned long size) { unsigned long offset; pte_t * ptep; int freed = 0; if (pmd_none(*pmd)) return 0; if (pmd_bad(*pmd)) { pmd_ERROR(*pmd); pmd_clear(pmd); return 0; } ptep = pte_offset(pmd, address); offset = address & ~PMD_MASK; if (offset + size > PMD_SIZE) size = PMD_SIZE - offset; size &= PAGE_MASK; for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { pte_t pte = *ptep; if (pte_none(pte)) continue; freed ++; direct_pte_clear(ptep); } return freed; } static inline int direct_zap_pmd_range(mmu_gather_t *tlb, pgd_t * dir, unsigned long address, unsigned long size) { pmd_t * pmd; unsigned long end; int freed; if (pgd_none(*dir)) return 0; if (pgd_bad(*dir)) { pgd_ERROR(*dir); pgd_clear(dir); return 0; } pmd = pmd_offset(dir, address); end = address + size; if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) end = ((address + PGDIR_SIZE) & PGDIR_MASK); freed = 0; do { freed += direct_zap_pte_range(tlb, pmd, address, end - address); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); return freed; } /* * remove user pages in a given range. */ void direct_zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) { mmu_gather_t *tlb; pgd_t * dir; unsigned long start = address, end = address + size; int freed = 0; dir = pgd_offset(mm, address); /* * This is a long-lived spinlock. That's fine. * There's no contention, because the page table * lock only protects against kswapd anyway, and * even if kswapd happened to be looking at this * process we _want_ it to get stuck. */ if (address >= end) BUG(); spin_lock(&mm->page_table_lock); flush_cache_range(mm, address, end); tlb = tlb_gather_mmu(mm); do { freed += direct_zap_pmd_range(tlb, dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); /* this will flush any remaining tlb entries */ tlb_finish_mmu(tlb, start, end); /* decrementing rss removed */ spin_unlock(&mm->page_table_lock); } int direct_unmap(unsigned long addr, unsigned long size) { direct_mmap_node_t * node; struct list_head * curr; struct list_head * direct_list = ¤t->mm->context.direct_list; curr = direct_list->next; while(curr != direct_list){ node = list_entry(curr, direct_mmap_node_t, list); if(node->vm_start == addr) break; curr = curr->next; } if(curr == direct_list) return -1; list_del(&node->list); kfree(node); direct_zap_page_range(current->mm, addr, size); return 0; } int direct_disc_unmap(unsigned long from, unsigned long first_pg, int tot_pages) { int count = 0; direct_mmap_node_t * node; struct list_head * curr; struct list_head * direct_list = ¤t->mm->context.direct_list; curr = direct_list->next; while(curr != direct_list){ node = list_entry(curr, direct_mmap_node_t, list); if(node->vm_start == from) break; curr = curr->next; } if(curr == direct_list) return -1; list_del(&node->list); kfree(node); while(count < tot_pages){ direct_zap_page_range(current->mm, from, PAGE_SIZE); from += PAGE_SIZE; count++; } return 0; }