diff options
Diffstat (limited to 'target/linux/generic/pending-4.4/090-MIPS-c-r4k-Use-IPI-calls-for-CM-indexed-cache-ops.patch')
-rw-r--r-- | target/linux/generic/pending-4.4/090-MIPS-c-r4k-Use-IPI-calls-for-CM-indexed-cache-ops.patch | 317 |
1 files changed, 0 insertions, 317 deletions
diff --git a/target/linux/generic/pending-4.4/090-MIPS-c-r4k-Use-IPI-calls-for-CM-indexed-cache-ops.patch b/target/linux/generic/pending-4.4/090-MIPS-c-r4k-Use-IPI-calls-for-CM-indexed-cache-ops.patch deleted file mode 100644 index 0c1c0a4509..0000000000 --- a/target/linux/generic/pending-4.4/090-MIPS-c-r4k-Use-IPI-calls-for-CM-indexed-cache-ops.patch +++ /dev/null @@ -1,317 +0,0 @@ -From: James Hogan <james.hogan@imgtec.com> -Date: Mon, 25 Jan 2016 21:30:00 +0000 -Subject: [PATCH] MIPS: c-r4k: Use IPI calls for CM indexed cache ops - -The Coherence Manager (CM) can propagate address-based ("hit") cache -operations to other cores in the coherent system, alleviating software -of the need to use IPI calls, however indexed cache operations are not -propagated since doing so makes no sense for separate caches. - -r4k_on_each_cpu() previously had a special case for CONFIG_MIPS_MT_SMP, -intended to avoid the IPIs when the only other CPUs in the system were -other VPEs in the same core, and hence sharing the same caches. This was -changed by commit cccf34e9411c ("MIPS: c-r4k: Fix cache flushing for MT -cores") to apparently handle multi-core multi-VPE systems, but it -focussed mainly on hit cache ops, so the IPI calls were still disabled -entirely for CM systems. - -This doesn't normally cause problems, but tests can be written to hit -these corner cases by using multiple threads, or changing task -affinities to force the process to migrate cores. For example the -failure of mprotect RW->RX to globally sync icaches (via -flush_cache_range) can be detected by modifying and mprotecting a code -page on one core, and migrating to a different core to execute from it. - -Most of the functions called by r4k_on_each_cpu() perform cache -operations exclusively with a single addressing-type (virtual address vs -indexed), so add a type argument and modify the callers to pass in -R4K_USER (user virtual addressing), R4K_KERN (global kernel virtual -addressing) or R4K_INDEX (index into cache). - -local_r4k_flush_icache_range() is split up, to allow it to be called -from the rest of the kernel, or from r4k_flush_icache_range() where it -will choose either indexed or hit cache operations based on the size of -the range and the cache sizes. - -local_r4k_flush_kernel_vmap_range() is split into two functions, each of -which uses cache operations with a single addressing-type, with -r4k_flush_kernel_vmap_range() making the decision whether to use indexed -cache ops or not. - -Signed-off-by: James Hogan <james.hogan@imgtec.com> -Cc: Ralf Baechle <ralf@linux-mips.org> -Cc: Paul Burton <paul.burton@imgtec.com> -Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> -Cc: linux-mips@linux-mips.org ---- - ---- a/arch/mips/mm/c-r4k.c -+++ b/arch/mips/mm/c-r4k.c -@@ -40,6 +40,50 @@ - #include <asm/mips-cm.h> - - /* -+ * Bits describing what cache ops an IPI callback function may perform. -+ * -+ * R4K_USER - Virtual user address based cache operations. -+ * Ineffective on other CPUs. -+ * R4K_KERN - Virtual kernel address based cache operations (including kmap). -+ * Effective on other CPUs. -+ * R4K_INDEX - Index based cache operations. -+ * Effective on other CPUs. -+ */ -+ -+#define R4K_USER BIT(0) -+#define R4K_KERN BIT(1) -+#define R4K_INDEX BIT(2) -+ -+#ifdef CONFIG_SMP -+/* The Coherence manager propagates address-based cache ops to other cores */ -+#define r4k_hit_globalized mips_cm_present() -+#define r4k_index_globalized 0 -+#else -+/* If there's only 1 CPU, then all cache ops are globalized to that 1 CPU */ -+#define r4k_hit_globalized 1 -+#define r4k_index_globalized 1 -+#endif -+ -+/** -+ * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core. -+ * @type: Type of cache operations (R4K_USER, R4K_KERN or R4K_INDEX). -+ * -+ * Returns: 1 if the cache operation @type should be done on every core in -+ * the system. -+ * 0 if the cache operation @type is globalized and only needs to -+ * be performed on a simple CPU. -+ */ -+static inline bool r4k_op_needs_ipi(unsigned int type) -+{ -+ /* -+ * If hardware doesn't globalize the required cache ops we must use IPIs -+ * to do so. -+ */ -+ return (type & R4K_KERN && !r4k_hit_globalized) || -+ (type & R4K_INDEX && !r4k_index_globalized); -+} -+ -+/* - * Special Variant of smp_call_function for use by cache functions: - * - * o No return value -@@ -48,19 +92,11 @@ - * primary cache. - * o doesn't disable interrupts on the local CPU - */ --static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) -+static inline void r4k_on_each_cpu(unsigned int type, -+ void (*func) (void *info), void *info) - { - preempt_disable(); -- -- /* -- * The Coherent Manager propagates address-based cache ops to other -- * cores but not index-based ops. However, r4k_on_each_cpu is used -- * in both cases so there is no easy way to tell what kind of op is -- * executed to the other cores. The best we can probably do is -- * to restrict that call when a CM is not present because both -- * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops. -- */ -- if (!mips_cm_present()) -+ if (r4k_op_needs_ipi(type)) - smp_call_function_many(&cpu_foreign_map, func, info, 1); - func(info); - preempt_enable(); -@@ -456,7 +492,7 @@ static inline void local_r4k___flush_cac - - static void r4k___flush_cache_all(void) - { -- r4k_on_each_cpu(local_r4k___flush_cache_all, NULL); -+ r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL); - } - - static inline int has_valid_asid(const struct mm_struct *mm) -@@ -503,7 +539,7 @@ static void r4k_flush_cache_range(struct - int exec = vma->vm_flags & VM_EXEC; - - if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) -- r4k_on_each_cpu(local_r4k_flush_cache_range, vma); -+ r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma); - } - - static inline void local_r4k_flush_cache_mm(void * args) -@@ -535,7 +571,7 @@ static void r4k_flush_cache_mm(struct mm - if (!cpu_has_dc_aliases) - return; - -- r4k_on_each_cpu(local_r4k_flush_cache_mm, mm); -+ r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm); - } - - struct flush_cache_page_args { -@@ -629,7 +665,7 @@ static void r4k_flush_cache_page(struct - args.addr = addr; - args.pfn = pfn; - -- r4k_on_each_cpu(local_r4k_flush_cache_page, &args); -+ r4k_on_each_cpu(R4K_KERN, local_r4k_flush_cache_page, &args); - } - - static inline void local_r4k_flush_data_cache_page(void * addr) -@@ -642,18 +678,23 @@ static void r4k_flush_data_cache_page(un - if (in_atomic()) - local_r4k_flush_data_cache_page((void *)addr); - else -- r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr); -+ r4k_on_each_cpu(R4K_KERN, local_r4k_flush_data_cache_page, -+ (void *) addr); - } - - struct flush_icache_range_args { - unsigned long start; - unsigned long end; -+ unsigned int type; - }; - --static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end) -+static inline void __local_r4k_flush_icache_range(unsigned long start, -+ unsigned long end, -+ unsigned int type) - { - if (!cpu_has_ic_fills_f_dc) { -- if (end - start >= dcache_size) { -+ if (type == R4K_INDEX || -+ (type & R4K_INDEX && end - start >= dcache_size)) { - r4k_blast_dcache(); - } else { - R4600_HIT_CACHEOP_WAR_IMPL; -@@ -661,7 +702,8 @@ static inline void local_r4k_flush_icach - } - } - -- if (end - start > icache_size) -+ if (type == R4K_INDEX || -+ (type & R4K_INDEX && end - start > icache_size)) - r4k_blast_icache(); - else { - switch (boot_cpu_type()) { -@@ -687,23 +729,59 @@ static inline void local_r4k_flush_icach - #endif - } - -+static inline void local_r4k_flush_icache_range(unsigned long start, -+ unsigned long end) -+{ -+ __local_r4k_flush_icache_range(start, end, R4K_KERN | R4K_INDEX); -+} -+ - static inline void local_r4k_flush_icache_range_ipi(void *args) - { - struct flush_icache_range_args *fir_args = args; - unsigned long start = fir_args->start; - unsigned long end = fir_args->end; -+ unsigned int type = fir_args->type; - -- local_r4k_flush_icache_range(start, end); -+ __local_r4k_flush_icache_range(start, end, type); - } - - static void r4k_flush_icache_range(unsigned long start, unsigned long end) - { - struct flush_icache_range_args args; -+ unsigned long size, cache_size; - - args.start = start; - args.end = end; -+ args.type = R4K_KERN | R4K_INDEX; - -- r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args); -+ if (in_atomic()) { -+ /* -+ * We can't do blocking IPI calls from atomic context, so fall -+ * back to pure address-based cache ops if they globalize. -+ */ -+ if (!r4k_index_globalized && r4k_hit_globalized) { -+ args.type &= ~R4K_INDEX; -+ } else { -+ /* Just do it locally instead. */ -+ local_r4k_flush_icache_range(start, end); -+ instruction_hazard(); -+ return; -+ } -+ } else if (!r4k_index_globalized && r4k_hit_globalized) { -+ /* -+ * If address-based cache ops are globalized, then we may be -+ * able to avoid the IPI for small flushes. -+ */ -+ size = start - end; -+ cache_size = icache_size; -+ if (!cpu_has_ic_fills_f_dc) { -+ size *= 2; -+ cache_size += dcache_size; -+ } -+ if (size <= cache_size) -+ args.type &= ~R4K_INDEX; -+ } -+ r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args); - instruction_hazard(); - } - -@@ -823,7 +901,12 @@ static void local_r4k_flush_cache_sigtra - - static void r4k_flush_cache_sigtramp(unsigned long addr) - { -- r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr); -+ /* -+ * FIXME this is a bit broken when !r4k_hit_globalized, since the user -+ * code probably won't be mapped on other CPUs, so if the process is -+ * migrated, it could end up hitting stale icache lines. -+ */ -+ r4k_on_each_cpu(R4K_USER, local_r4k_flush_cache_sigtramp, (void *)addr); - } - - static void r4k_flush_icache_all(void) -@@ -837,6 +920,15 @@ struct flush_kernel_vmap_range_args { - int size; - }; - -+static inline void local_r4k_flush_kernel_vmap_range_index(void *args) -+{ -+ /* -+ * Aliases only affect the primary caches so don't bother with -+ * S-caches or T-caches. -+ */ -+ r4k_blast_dcache(); -+} -+ - static inline void local_r4k_flush_kernel_vmap_range(void *args) - { - struct flush_kernel_vmap_range_args *vmra = args; -@@ -847,12 +939,8 @@ static inline void local_r4k_flush_kerne - * Aliases only affect the primary caches so don't bother with - * S-caches or T-caches. - */ -- if (cpu_has_safe_index_cacheops && size >= dcache_size) -- r4k_blast_dcache(); -- else { -- R4600_HIT_CACHEOP_WAR_IMPL; -- blast_dcache_range(vaddr, vaddr + size); -- } -+ R4600_HIT_CACHEOP_WAR_IMPL; -+ blast_dcache_range(vaddr, vaddr + size); - } - - static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size) -@@ -862,7 +950,12 @@ static void r4k_flush_kernel_vmap_range( - args.vaddr = (unsigned long) vaddr; - args.size = size; - -- r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args); -+ if (cpu_has_safe_index_cacheops && size >= dcache_size) -+ r4k_on_each_cpu(R4K_INDEX, -+ local_r4k_flush_kernel_vmap_range_index, NULL); -+ else -+ r4k_on_each_cpu(R4K_KERN, local_r4k_flush_kernel_vmap_range, -+ &args); - } - - static inline void rm7k_erratum31(void) |